{ "cells": [ { "cell_type": "raw", "metadata": { "ExecuteTime": { "end_time": "2020-01-02T10:50:31.982740Z", "start_time": "2020-01-02T10:50:31.976911Z" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Machine Learning with vaex.ml\n", "\n", "If you want to try out this notebook with a live Python kernel, use mybinder:\n", "\n", "\"https://mybinder.org/badge_logo.svg\"\n", "\n", "\n", "The `vaex.ml` package brings some machine learning algorithms to `vaex`. If you installed the individual subpackages (`vaex-core`, `vaex-hdf5`, ...) instead of the `vaex` metapackage, you may need to install it by running `pip install vaex-ml`, or `conda install -c conda-forge vaex-ml`.\n", "\n", "The API of `vaex.ml` stays close to that of [scikit-learn](https://scikit-learn.org/stable/), while providing better performance and the ability to efficiently perform operations on data that is larger than the available RAM. This page is an overview and a brief introduction to the capabilities offered by `vaex.ml`." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:14:59.411079Z", "start_time": "2021-04-13T10:14:57.668212Z" } }, "outputs": [], "source": [ "import vaex\n", "vaex.multithreading.thread_count_default = 8\n", "import vaex.ml\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will use the well known [Iris flower](https://en.wikipedia.org/wiki/Iris_flower_data_set) and Titanic passenger list datasets, two classical datasets for machine learning demonstrations." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:00.780624Z", "start_time": "2021-04-13T10:14:59.413189Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_
0 5.9 3.0 4.2 1.5 1
1 6.1 3.0 4.6 1.4 1
2 6.6 2.9 4.6 1.3 1
3 6.7 3.3 5.7 2.1 2
4 5.5 4.2 1.4 0.2 0
... ... ... ... ... ...
1455.2 3.4 1.4 0.2 0
1465.1 3.8 1.6 0.2 0
1475.8 2.6 4.0 1.2 1
1485.7 3.8 1.7 0.3 0
1496.2 2.9 4.3 1.3 1
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_\n", "0 5.9 3.0 4.2 1.5 1\n", "1 6.1 3.0 4.6 1.4 1\n", "2 6.6 2.9 4.6 1.3 1\n", "3 6.7 3.3 5.7 2.1 2\n", "4 5.5 4.2 1.4 0.2 0\n", "... ... ... ... ... ...\n", "145 5.2 3.4 1.4 0.2 0\n", "146 5.1 3.8 1.6 0.2 0\n", "147 5.8 2.6 4.0 1.2 1\n", "148 5.7 3.8 1.7 0.3 0\n", "149 6.2 2.9 4.3 1.3 1" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = vaex.datasets.iris()\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:01.207334Z", "start_time": "2021-04-13T10:15:01.016134Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/jovan/vaex/packages/vaex-core/vaex/viz/mpl.py:205: UserWarning: `scatter` is deprecated and it will be removed in version 5.x. Please use `df.viz.scatter` instead.\n", " warnings.warn('`scatter` is deprecated and it will be removed in version 5.x. Please use `df.viz.scatter` instead.')\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEHCAYAAABMRSrcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABBu0lEQVR4nO3dd5hcZfXA8e+ZvrMlm7LplRCSkBBCCKH3TkCKKCAgReoPEEQFFQUVUERAARFEuiBIlyYCUgKEkkInBBKSkJ5N3T7tnt8fd7K7szvbZ3a2nM/zzLM779xy7kLumXvv+75HVBVjjDG9myfXARhjjMk9SwbGGGMsGRhjjLFkYIwxBksGxhhjAF+uA2iPAQMG6OjRo3MdhjHGdCvz5s1br6ol6T7rlslg9OjRzJ07N9dhGGNMtyIiy5r6zG4TGWOMsWRgjDHGkoExxhgsGRhjjMGSgTGmm1FNoPFlqLOxfes7G9H4UlTj9doq0PgSVCOZCrPbyWpvIhEZATwADAYc4E5VvbnBMvsB/waWJJueVNXfZjMuY0z3pDWvoGW/BKcaSKCBXZDimxBP35bXdcrQzT+B6GzACxJAC38FsTlQ/RSID1A0/3wk/1xEJNuH06Vku2tpHPixqs4XkUJgnoi8rKqfN1juTVU9MsuxGGO6MY19jm6+FKipa4y+h246B+n/WMvrb74IovOAaLKhGsouB7xumybbK29HPQOR8HEZPoKuLau3iVR1tarOT/5eDiwAhmVzn8aYnkkr76f2RF4rDrGFaHxR8+vGV0B0fpr1E43btBoq/9axYLuhTntmICKjgZ2A99J8vLuIfCQi/xGRSU2sf46IzBWRuaWlpdkM1RjTFSWW495tbkB8kFjb/LpOKUig9ftyet85plOSgYgUAE8Al6hqWYOP5wOjVHVH4Fbg6XTbUNU7VXW6qk4vKUk7mtoY05MFdgfSnNA1Bv6Jza/rG1d3G6g1/FPaFFpPkPVkICJ+3ETwkKo+2fBzVS1T1Yrk7y8AfhEZkO24jDHdi+SfDJ4iUh915kH4VMTTr/l1PQVQcK67fC0vSD4QrL8kkIcUXpapsLuNbPcmEuBuYIGq3tTEMoOBtaqqIjIDN0FtyGZcxpjuRzz9oP+/0Yq/QuR18BQj+WdA6KhWre8puBD1jUUr74LEBgjuhRRcCPFlaOVtEP8G/JORgh8i/u2yezBdkGSzBrKI7AW8CXxC3c2+XwAjAVT1DhG5EDgft+dRNXCpqs5ubrvTp09Xm6jOGGPaRkTmqer0dJ9l9cpAVd/Cve5qbpm/AH/JZhzGmMzR+Aq0/I8QfRMkDOGTkPxzcO8IZ45T9SiUXwdaAfggdDyeYhuClC3dcgprY0xuqLMR3fBt0C2A456oK/6GxhYifW/J2H6c6qeg7Jf1WuJQ8wjOxg14+t2Wsf2YOjYdhTGm1bTqEdAqUrt41kDkNTT+TeZ2VPb79O3Rl3Gcqsztx9SyZGCMab3oB0Ca+XvED/EvMrcf3dL0Zy0MMDPtY8nAGNN6/nFAmmcDmgDvyMztR/Ka/syXwf2YWpYMjDGtJuGT3auAFH7wT0D8EzK3o/xz07f7JuPxFGduP6aWJQNjTKuJdxjS937wbYc7wZsfQgcjfe/K6H48BedD3qmknKJ8U6HfIxndj6mT1XEG2WLjDIzJPXUqQAJIW+b8aSPHccBZA55+eDyhrO2nt8jZOANjTM+jGoXqp9Ga/4KnD4RPAv/OEHkZrX4aECTvWAgeBLE5aNU/wSlHQkdA3lEQ/xqt+gckVkFgLyT8XcRTmHZfHo8HPEPr7bsGrXoSIq+Apz8SPgUJ7Jg+zsjbaNW/QKuRvCMhNBORxqc8jX+DVt0P8cXgn4aET0a8/TPxp8oYdcrRqkch+hZ4hyLh7yP+8Rndh10ZGGNaTTWKbjgp2aOnGndMaRC8YyCxDEh2+5QweEdDfElyOdyHwjLI/aZPFLd7agg8/ZABT7VYoEa1Gt3wHYgvT9134c/x5J+UsqxTdgNU/8OdjhqAMAR2RPreg4i3bpvReejGM4EY7iQIAZAw0v8JxDei3X+nTFJnE7r+GHA24dZy8AABpPhGJHRwm7bV3JWBPTMwxrRe9XOQ2JoIABSogcQCahMBuGMR4p/XWw73xOwsdZevHadQA856tPLuFnetVY+68wc13Hf5dahTWbdcYiVU3V8vEeDGFv3IndOo/ja3XJHc3tYSmFHQMrT8xhbj6Sxa+Xdw1lNX1McBatAtV6CayNh+LBkYY1pNIy81OMlmQhRqXml5sZqXSKlytpV4IfZx3fvIeyDpTm1VaOTV2nfqlEMi3UA5x70d01XUvIJ75dJQFBJL0rS3jyUDY0zrefqSldOGp6iV+07HgfrPHDwFpI/RB/W7pUqAJqdOk/yW4+ksnj7p2zUBUpC53WRsS8aYHk/yTiRtgZnWb4HGp508JHxay2uGTyG1HkFye54B4KtXIDG4L26314Z8SN7xdWtKEEKH0Ph4QhA+ucV4Oov7t2l43F7wT0K8gzO2H0sGxphWk8COUHg5EHS/lUo+eAZB4W+S7+u9Cq8GT4m7jBS46xRcBr5tkw+TC4AAhL8HoSNa3ndwNyj4Yb19h8EzFOl7N27plORyEkT63QPSt96+86DoGsQ3JnWbRVcnq5qF6uIJHYLkn5m5P1pHhWa6fyMCyePOA99YpDhzEwOC9SYyxrSDOuUQm++enPw7IeJxu5xG5wACgemIBNwHnLEPQCvBvzPiKUBV3YfLiXXg3wHxtq2woTpb3G1KMfh3TEkEKctp3I1RI8l9h5veZnyR20vJvx3iHdameDqLJtZD7BPwDgTf9k0ed3Oa601kycAY0yWpKsTmJU+AQyG4f5MD3DSxEmpeA/FB8OAuN06gKapRiLzmjrnw7+AmrXac5FvLBp0ZY7oV1Yjb/z/+GWjcnQ9J8qHfw436/zuVd0P5n3GfRwhwLdrn93jyjsxB5K2n8W/QjSe53XA15iYy32Tod7f7PKOT2TMDY0yXoxV3ut1FtQq373+lOx5hy49Tl4t9BeU3406rXYM7ZiACW36OOhs7P/A20M2XgrPBPTai7rHGPnLHFeSAJQNjTNdT/SSN6yY4EPsMdTbVtmjNc6Ttgy8eqPlfNiPsEHU2QnwBqUWCACJQ9WQuQrJkYIzpipoaWSug9U+gcdyRyA2oNrONLkAdmi4PH2+iPbssGRhjup7QTNKOZ/CNSXk4LKFD0y+HQnC/LAXXceIdAN5RaT4JuJP55YAlA2NMlyMFF4BvlDuWAIA8kCKkzw2py/mnuLOmEsI9nXlxJ6/7aUYHZGWDFN8IUogbO+6x+kYh+efnJB7rTWSM6XLEUwD9n4bIq2j0Q7cHUehIJM20FZ6in6N530JrXsIdZTwT8W3T6TG3lfgnQMlrUPMsGl+BBKZC8ACkUSW5zmHJwBjT6TSxxp2J0zcWSdY7VmcjJFaCdyTi6YOIHw3siXiHgmdQbSJQjUL8K/dKIdnNVPyTEP+k1H2ouhO5aQR826VMXZ0rGl8OWga+cYgE3GMKn9zk04POZMnAGNNp1ClDN/8QovOStZQTaP4PIf4l1DzvTh6nMTTveHdiusq73P73GkWD+0Bgf6j4PaCgcdQ3Hun7V8Q7MHU/8a/RTedDYrU7qylBKL4BCe6Vi8NGE2vRTRe4xyleQNDCq/CEj85JPOnYCGRjTKdxNp4J0fdI7Q7qw+1ZU7/Nj9tLKN6gLUFqd0wv+MbhGfBMbYtqDC3d1+3Dn9LTKA8p+Y97pdGJVBXd8K1kQaD6PZxCSP8H3ecencSK2xhjck4TpRB9n8bjAuJp2rZWHmvY1rBffgLiy9DYwrqmyFvJmgsNv+jG0arH2hN6x8QXJusmNOzqGkUrH+j8eJpgycAY0zmcjclbQxkm3mQlsK372dBgLMJWsWTJzU7mbCD9lNqOexuri7BkYIzpHA2mj84YjYF/ct37wM40voIACCOBPbMTQ3P8k9wYGwkmay90DZYMjDGdQiQAhT8jtVCLr67WQe3pyINbXyBMXR8XcdsoJmWQmeRBwflIvWpg4hsDeUe6n9UKgW90sphN5xJPMRScS+pxB8DTHwmf1OnxNMV6ExljOo0nfALqHYlW3gWJNRDcA8k/G5wNaMUdbpdR/2Sk4FyQkDthXXQO+EYi+eeAbxu08l6I/A88/ZDw6UjogEb7kaJrIbArWvUwaA2EZiL5pzY5BXa2eQouRH0T0ar73dtlwYOQ/NOR+uU6c8x6ExljTC+Rs95EIjJCRF4TkQUi8pmIXJxmGRGRW0RkkYh8LCLTshmTMSY9p/pFnNKDcNZMxCk9EKf6eTQ6F2f9cThrtsdZtztOxd04sUU4G8/AWTMJZ+3OOGW/x4mvwtl8Kc6aHXDWTMHZ/JMuP4V0Jmh8Bc6mc3HWTMZZOxVny1WoU9m6dVVxKv+Bs25P92++/lto5J0sR9y0rF4ZiMgQYIiqzheRQmAecIyqfl5vmSOAi4AjgF2Bm1V11+a2a1cGxmSWU/0ibLkMtybAVgHc7pn1H36GcB/OxqjruhnEvacfp647qA+8w5EBLyDSM+9Gq1OOlh4Mupm6B9YBt1B9v0darFjmVPwVKv6GW4NhqxDS7z4kkJ3vxDm7MlDV1ao6P/l7ObAAaFhg9GjgAXW9CxQnk4gxprNU3EBqIgCI0rj/f02yvf6XyK2FZeqPC4iDUwqR1zMcaNeh1U8mi+/U77kUhdgXbmGe5tbVKFTeSWoiAKhBK27OcKSt02m9iURkNLAT8F6Dj4YBy+u9X0HjhGGMyabEysxvU6vdB8I9VewzGidQAGn5uJ31yZoLaeTob9YpyUBECoAngEtUtazhx2lWafRXEpFzRGSuiMwtLS3NRpjG9F7ZmO5Z8qAbzB7abr7x1E4/XZ/Q8nF7+jdd28abpfEYLch6MhB3PtYngIdUNV09txVA/QrXw4FVDRdS1TtVdbqqTi8pKclOsMb0Vvk/ovGJLYg7H1B9oTRtgeSr/ihbH3iKIdi422dPIeFvg2x9XrKV3z2Z+3dqfl0JQvh0UsceAISQwkb9bDpFtnsTCXA3sEBVb2pisWeA7yd7Fe0GbFHVrjNG25hewBP+FhRdDZ7kFYJnIBRdhfT9O3jHum1SCAXnQL/Hwb8z7kkwCHnHwYDnILg/bkLwufPy93s0Z3PzdwbxFCP9H4PArrinUr9bc6HfAy0+PAaQgouh4AKQ5IA572ik761IYEZW424yniz3JtoLeBP4hLqnLL8ARgKo6h3JhPEX4DCgCjhDVZvtKmS9iYzJHlUHEU+r2kBSTnxbzyetORn2JOn+Fm1dv+HfNxua602U1T5fqvoWTd8Z27qMAhdkMw5jTOvVPylpYgNadR9E3kG9Q5H8H6De4bDl5+4MpBJE80/BU3BRct0GiSHyGlr1IGgFhI5AwifUFrPpKjS20B0RHf8aAlOR/B+0eZrrjp7IOyMRtBiDjUA2xqSjiXXo+m+5J3KiuN/r/LgX+Q2ml/bvgqf/QylNTtmNUPUAdd0n3fmBpP9j7j3zLkAjs9FN5+Een4M7V1Ie0v9xd46jHsbqGRhj2kwrbndLNBLd2pL8vWGdASA2BydW1yVSE2uh6l5S+9HXQGIZVD+ftZjbQlXRsitxu4duvYsdB61Ay/+Yw8hyw5KBMSa96CzSnvibUv3vut9j89PXLtBqNPJqh0PLCC2HRKOOi4Amq7H1LpYMjDHpefq2bfn6YxWkqXW9bk+lrkBCNHkKlKJODaUrsGRgjElL8n9A437w6Sp2JdvzTqx7G9jF7YraqP+IHwmfSFcgEnDrHtDw+UUe5J+Ri5ByypKBMSa94GFQcBYQTJ7YQ+5gquBxDRb0Qd978XjqOieKeJF+94N3hDsSWQpA8qHP7xH/dp15FM2SoqsguAd1xxiAvOOQ8Cm5Dq3TWW8iY0yz1Clzi7p7BiK+UQA4ThlUP+dOqxA8GI8n/fdKVXXX1Urw75Cz4jIt0cQqd34m31jE0y/X4WRNzsYZGGO6poqaFaxdczV+XU3cN4PRQy8HiULFzRD/HLwToPASPJ4CxFPk3vapRxJL3ambnTjoRmBA2v2ICPgntBiP45RB+Z8h8SX4doCCi9zup9E3IfYJeIZB6DDEE067vsYXQc0r7kPr4KGIb3j65ZzNUPMft0h9YAb4d0FE3HEFLYwtUFWIzXHHV3j6u+Mm6pXb7O7sysCYXmZl6ZMMiv8s5W5+xPER9CpCol6rB/o9iSewfW2LqqJbfgY1L+JOXe0HBCm+GQnt3654nOjHsPG7pE4F7QPPKHBW405MEAYJIP0fRnxjU9cvvxkq78bt+eQBBIp+iSd8QspyGp2HbvpBcrbQGvf2lX860veOFmsuqMbRTedCbJ47GyshEA/S9+6s1R7IBhtnYIyp1T96BQKI1L2CnjhoosGSDmw+O7Up8hpE/os7fsBhay0D3fIjVNNN59wKm84hNRGAWw9hMW4iwP2pW9DNP0lZSmOfJxPB1noKUTemsmvQRN3sxqoOuvmiZP2BakDd36NzoPqpFkPUqicgOje5vrrb0Ep080XJqSi6P0sGxvQi6zfPxe9J0HAKna1JoRGnFMepO9lp9dPJE2JDnnb1zXecaPI2U2soxL9MKaepNf+hblBcg3gi/6t7G1+Q/EbfUDVa/XjLu655gsaFaHCfhcS/aHn9bsCSgTG9SYfnj+tqE9A1F480+L2pW+KtOaaudtyZZ8nAmF5kQJ/pRB1voyJbqk0U3vIMTOkpJHnHgKR7iKsQ2K3N8Xg8AZD+rVxawDc+pbePhA7HraXQkAPBA+ve+ia4XVsbyUPyjm95z3nH03jMBW6XWV/LD8i7A0sGxvQyG4N/QKlLAKpQk/CBNBxQ5oW+d6U2BfeD0EzcIjfe5M+Q+wC5vZPP9b2LxqciH3jGJxOPx/0pxUjxjSlLiX+iW2OhthBP0H0V/Rrx1vVwEvEgxbclE0KeG7vkuWMM8o5pOca8YyG4m7sOXncbko8U39YlZhzNBOtNZEwvVB1Zx6o1v8XnrCLh343RQ34MEoOKv0D8U/BNhIKL8DTVlTP2OUTeBE8BhA7vcN98x6mEilvcMQn+KZD/f+6YhOhst2updyiEDkUkTZlJQONLks8I/O5yTZTxVKc82bV0Y7Jr6U6trkHgdi39wH02Utu1tKC9h5wTzfUmsmRgTDelGoHYx+4cO77JWSsos7mmmgWlpQwqKGCbvj13QFZvYIPOjOlhnOr/QNkvcB9sOiDF0PfOjE71oKrc9O7b3DV/LgGvl5jjMHFACXcddSx987pWgRrTcT3jZpcxvYjGv4Ytl7vdGrXC7erprEI3fh/VWMb28/xXC7nng3lEEgnKo1Fq4nE+XbeWH774XMb2YboOSwbGdDNa9SiQ7qQfce+xZ8hdH8yjOp5azyDmOMxZtZLSqsqM7cd0DZYMjOlunPVAw9HCWz/bnLHdbKpON0gLfB4P5ZFIxvZjugZLBsZ0MxLcD0jTy0fjbg+ZDNlv9Bh8aWYjDXp9jOpTnLH9mK7BkoEx3U3oUPCPw+3jv1UehE9BvEMytpsLd9mN4lCIgNcdf+ARIeTzcc3+B+JtYspq031ZbyJjuhkRP/R70J08reZ5kDAS/p47ICyDSvLzefHk07jvww+YveIbhhcW8YNp09lh4KCM7sd0DTbOwJhuLO44eETwtDDGwFEl4Tj4vU2VrWy7WCKB1+Npcd+dRTUOCNJoJLXZysYZGNPDfLVhA1e8+jLz16zCK8IR47bjN/sdSFEwdYRudSzGNW++xpMLPifmOEwqGcjV+x/ElEHpR+i2xuel67ji1Zf5ZN1afB4PR4+fyJX77E9+IDdVzDT+DVp2ZXLWVEGDByJ9ftOjK5Zlg10ZGNPNbKiq4sAH7qE8Gqmdh9Pv8TJhwACePuHklJHIpz/9BO+tXE4kUdf7KOz38+LJpzG8qO1VulaXl3PIg/dRGaubNjrg9TJt8FD++e3vtvuY2kudCrT0ILfqWm1NBB94RyADXrCrhAasuI0xPcijn39CJBFPmZA55iRYvGkjH65ZXdu2ZPMm3l+5IiURgHt7594P57dr3w9+8iExJ3V70USCD9eu5ssN69u1zQ6peT5Zp6B+gZk4OOsg+nbnx9ONtToZiMhxIvKViGwRkTIRKReRsmwGZ4xp7Iv16xud4MGdmGLJ5k2175du3oTf2/ifeMxx+GJ9aaP21lhQWko0zb59Hk/KvjuLxheRvuhMDOJLOzucbq0tVwbXA99S1T6qWqSqhapalK3AjDHp7TBwECFf48d9jirj+9dN2zyuX/+0J26/x8PUwe3rgjpl0GCCaR5CxxIO4/q1ti5B5oh/Yvr6CuIDX+bmaeoN2pIM1qrqgqxFYoxple9OmkzY78db79lA0OtlyqAhTKrX7XN4UR8OHDOWkLcucQgQ8vn4/pSd2rXvk6fsSMjnS6n7FfR62WvkqNzMaBo6AqQIt8bAVgHwjobArp0fTzfW4gNkETku+eu+wGDgadwq2ACo6pPZCq4p9gDZ9HYry8q4etZrvLFsKUGfl+MnTuIne+xFyOdPWS6WSHDr++/w0CcfURmLsfvwEfxy7/0Y24Fv8Us2b+KaWa8xe/k35Pn8nDh5ChfvujvBNFcrnUET69Dy66DmFfeKIHQUUvjTbldroDN0qJ6BiNzbzMeqqmd2JLj2sGRgjDFt16FxBqp6RnIje6pqyuN5EdmzhR3fAxwJrFPVyWk+3w/4N7Ak2fSkqv62pZiM6QnmrV7JX+e8x7LNm5k6eAgXztiN0cV9W7Xu+ytX8P2nHiPquL1oBoTDvHLqmTyzcAH/+uwTEqocN2F7TtlhR15duoR7P5zH5poaDhm7LWdPm05xqHE9gv8s+pJrZr3G+qoq+uWFuWLvfZk5tg9acYfbh987BMk/G/yT0cq7oeZl8BQg4e9DaGbWiuuYztHqcQYiMl9Vp7XU1uDzfYAK4IFmksFPVPXItgRtVwamu3tp8Vdc8t8XqElOEe0RIc/n48nvnsy4/s3fwlm8YT0HP3R/2s/yfL7aaadDPh99giG21NRQk3DbAl4vA/LCvHDyaRQF62oWP/zpx1zx6ssp2xKUB/Z7hd0HfgNsnco66NYR1gpg61iDPAifgKfoF236G5jO16FxBiKyu4j8GCgRkUvrvX5N6lObRlR1FrCxPUEb01OpKle9/r/aRABuT6CqWIzrZ7/Z4vonPfVYk5/Vrz9QE4+ztrKiNhGAOyZgY001//r045T1rpn1WuM4Ea75YBp1iQAgArqRukQAUA1VD6OJtS3Gbrqu1vQmCgAFuLeUCuu9yoDjMxDD7iLykYj8R0QmZWB7xnRpG6ur2VRT06hdgbmrVra4/vqqqg7tvyYe541vlqa0NSxis9XCLf1o1c0D8UPsow7FZXKrNc8M3gDeEJH7VHVZhvc/HxilqhUicgRuT6Vx6RYUkXOAcwBGjhyZ4TCM6TwFgQBN3V3vH265trBPhHgHppHxiDCsMHWIkADptljoj9K6RwEOeAa2OyaTe625TfSsiDwD3CoizzR8dWTnqlqmqhXJ318A/CIyoIll71TV6ao6vaSkpCO7NSangj4fx0zYvtHgrTyfj3N3brk4zf9Nb33/eaHxP/KA18tpO6aOM9hjROMvWB5xOGXs5w1afWm26AXPIPDv2Oq4TNfTmttENwA34vb4qQb+nnxVAJ92ZOciMliSXRBEZEYyng0d2aYx3cGv9z2AQ8ZuS8DrpSAQIOTzcda0XTh+Yst3Si/ZfU8mlTT+Fn7CpB0Y1aeYPJ+PsN/PkIJC7jzyaKYMHkLQ6yPf76coGOSGgw9j+wbr33v0t5kwIPV72DbFA7h4twNA8tyHxgQgsDf0uQGkONkWBN9EpN/91puom2tLb6JZqrpPS20NPn8Y2A8YAKwFrgL8AKp6h4hcCJyP+4SqGrhUVVus6G29iUxPsaGqinWVFYzsU9zmKaC3VFVx7exZDAzn85M99gbch9NLNm/CUWVs3361J+iV5WWURSJs27dfszUNVpRt4cM1q9lx0GBGJEtbqtZAfAl4BiDekmRbHOKL3a6l3mHtOHKTCx0adFZvIwuAmar6dfL9GOAFVZ2YsUhbyZKB6So+WL2KFxd9RcDr5VvjJ7bYLbQ9ovE4t7z/Dq8tXcLA/Hwu32MfJjRxq/SL9aU8++UXJByHI8aNZ8qgwcxbtZI/vTubzTXVzNxuPGdP24XySIQnv/icFWVbmD5kGIeM3TajhW8yQTUKNS+h0Q/ANxLJOxrxFOc6rG4tU8ngMOBO4Otk02jgXFX9byaCbAtLBibXVJUrX/8fTy74jJp4HI8Ifq+Xn+6xN2dMbXLoTZuV1dSw5713UhmLpbT/cu/9OHOnnVPa7pj7Pre8/447OZ0qQZ+Pbfv245PSdSnLFQWDJByHhCo18Thhv59hhUU8/p2TKKw39iCX1ClDN3wXEmuAKiAE4kP6PeROTmfaJSP1DFT1RdyePhcnX+NzkQiM6Qrmr1nFkws+pzru1hXYemK9/u1ZrKusyNh+fvryi40SAcC1b75O3Kmbw3/5li3c/N5sauJxHFUc3O6iDRMBQFkkQmUsVjvOoSoWY9mWzdwx7/2Mxd1RWvFXSCzHTQQANaAV6Oaf5DKsHq01vYkOSP48DpgJjE2+ZtabxM6YXuXFRV9RE298kvaI8NqSr9Os0T5vLk/fm1uB/y76svb9q0sXd2g/0USCZxZ+0aFtZFTN80Djvy+JZWgiB0V0eoHWTDO4L/AqcFSazxTo9FlLjck1n8eDiNDwNqskbxdlSnPF5oP1Zij1ebwdLkzv93SlwodNnZoUrJRlVrT4X19Vr0r+PCPNq9NnLDWmKzh6/EQCaU76jqMcOGZsxvZzxLjxadt9Hg8HjB5T+/6QsdvitGEgWsO0EfL5OGHylPaEmB3h44GGzy884N8B8bRuMj/TNm0pe7lYRB4SkfNEZPtsBmVMVzdhQAk/2nUPgl5vbb/+kM/Hnw49gj6hUMb287sDDmZIQWFKmwC3Hn4knnrf5EvC+Vx/0GEEvV7CPj95Ph9Br5cj0ySTbYr7UpKfT74/kIzfz4xhwzP64LujJP9sCEwD8qidHM8zECm+Mdeh9Vht6U0UBHYF9gb2BCYAH6nqsdkLLz3rTWS6itXl5by29Gv8Xi8HbzM27dTQmfDCV1/y/FcLGVpQyEUzdqOoiYSzsbqKV75eTEKVA8dsw8D8AtZXVXHLe7PZWF3NdydNZp9RY4glEry+dAmrK8rZcdBgdmxnGcxsUlV3vqPYx+AdBsF9EclNAZ2eokP1DOpJ4D7RSQAO7iCyxl0VjOlmFm5Yz7LNm9iu/4BW1xPYakhhId/bIXUahk3V1fzh7VlUx+NcvOvubNO3H3HH4ZFPP2ZNRTnfGj+R7ZK1ip//8gs+K13HfqO2Ycbw4QDMXv4Nb32zlB0GDebwbd06vrsMHYbf42FAOFzb/bMsEmHOqhW13+x9Hg/5/gBDCgpJqFIYcJcbEA7z2/0PSonR7/Vy8Nht2/7H6kQiAoGp7stkXVuuDKqAT4CbgFdUNWfTRtiVgcmEimiUs559ik/WrsHr8RBLOOw7ajS3HH5k2ucBrXHdW29w5/zU/zcnDSjhiw3rSdT7tzZ14GAWblyfMlvokPwCEqqsq6qsbSvwBzh6wkQe+/xTAl4vqkpJfj4nTNqBm999B5/XvVUU8Hq5ZNc9+OPsN2snnHNUufHgwzl027RzP5peKFODzo4G9gJm4E5mPhuYpar/y1SgrWXJwGTCT176D899uZCok6htC/l8nLXTdC7dvdkifmmtKtvCXvfdlckQ0/LgduNrzb/ckM/H/049kyGFhS0vbHq8TA06+7eq/hQ4F3gBOB14LiMRGtPJEo7Dc1+lJgJw5/r/56ftm5f/l691zvcih9YlAnCvDp75ckE2wzE9RFt6Ez0hIouBm4F84PuA9fEy3VLccVJG8NZXnWbEb2tsiVR3JKSsiCYSlEeiLS9oer22jDK5DthOVQ9V1WtU9Q1VrS3XJCIHZz48Y7Ij6PMxcUDjyd48Iuw5clS7tnnm1LRX3zmV5/Oz7+jRuQ7DdANtuU00R1UTzSzyhwzEY0yn+d2BhxD2+/F73IfFQa+XwkCAK/bar13bm7ndeAaGw43a040L9rZhtHD/UB55Pl/tekGvl+369ycvOQJZcAvj7DhoUG0bQNjv54AxY5g+xKaYNi1r9QPkFjck8oGq7tTykh1nD5BNpqwsL+MfH33Iwg2lTB08hJN3mMqANCf0tvjF//7L0wu/wHEc9hgxitsOP5KXlyzm5vdmUxaJsMeIkVy1zwF8uXE91775OqvLK5hYUsJv9zuIhONw5ev/Y+GG9YzoU8Sv9t6fiSUDefzzT3l96RKGFBZy6pSpbNuvP89/tZDnv1xIfsDPiZOmMGPYcN5YtpQnFnxK3HE4ZsL2HLzNth2epsL0HBnpTdSKncxX1U4ZwmjJwHQlZZEIXpGU4jSV0SgJVYpamBI6lkhQHo1QHMpr9qStqmyuqSHs9xP0tX3gVU3cnaW0TzBkFcl6sUwNOjPG1LNww3oue/lFFqwvRYAZw4bz8z335Q+z3+TdFd+gwPj+A/jjwYcxocHziYTjcNM7b3P/Rx8QV4cCf4DL99yb70zaodF+Xlv6Nb967RVKK6vwiDsv0q/3O4BQvVtCTSmPRPjFqy/x0uJFAAwv6sPvDzyEGcOGZ+JPYHqQTF4ZPKmqnTKltV0ZmFzbXFPNfvffTXkkUtvN0wN4xIOiKQPMCgNBXj/tB/TNq5uq4g9vz+KBjz5IGXQW8vn486FHcMjYukFiH61dw0lP/Ku29gC4zzYOHDOWvxyRbiLhVCc8/ggfrV3jFrxJyvP5eO5732dMG0dbm+6vQ+MMROS45l5bl+usRGBMV/DUF58TTSRS+vs7QFydlEQAEHMSPLHgs9r30USiUSIAd4zDze+9k9J2x9z3iDRYLpJI8L8liymtrKQ5izZu4JN1a1MSAbi3pu77cH4LR2h6m9bcJmru64fVMzC90tebNqV8W29OTTzO15s21r4vi0QaJYytVpWXpbxfsmlT2gFmAa+X1RXllOTnN7nf5WVb8KWpURBXZfHGjWnWML1Zi8lAVc/ojECM6U52GjyEpxZ8TlWaamcNhX1+dhoytPZ931CIsN/f6Bs7wMQBA1P3M2QoizdtbJQ8oolEi5Pqje8/gFiafQS9XnYeOjTNGqY3a1NpIxGZKSKXiciVW1/ZCsyYrmzmuPH0D4dTqoMFPB4KA0GC3rrvWH6Ph755eRy1XV1dAa/Hw2V77F07dmCrPJ+Pn+6xV0rb+dNnEPL5U8Yq5Pl8nD51Wos9lYYWFnHkdhNS9uNByPP7OXVKp/QCN91IWyaquwMIA/sDdwHHA++r6g+yF1569gDZdAUbq6u48Z23eXHRV/g8Hr67/WTOmrYzd38wj0c//5RYIsGhY8fx4933on+asQsvLvqKm9+bzeryciaWlHDZHnunXEFstWjjBv7w9pvMWbWCvqE8zp42nZMmT2lVF9GE43Dvh/O5/6MPKI9G2HfUGH66x14ML+qTkb+B6V4yNWvpx6o6pd7PAuBJVT0kk8G2hiUDY4xpu0yNM9g6C1eViAwFNgBjmlnemFaZv3oVf3r3bRZt3Mi4/v25ZNc9mJbmG3IurS4v5+b3ZjPrm6UUB0OcNW06B40Zyx3z3ue5Lxfi83g4YfIUzpw6DX87ayEYk0ttuTL4FXArcCBwG25PortU9VfZCy89uzLoOd5evoyzn306pWdOyOfj70cdw54j2jdhXKaVVlVy+IP3syVSU/sg160x7KMqHqt9EBzy+dht2AjuOdp6WZuuKSP1DIDrVXWzqj4BjMKtgXxNJgI0vddv33itURfNmnica2a9npuA0rjvw/lUxKIpPXqq43E2R2pSegTVxOO8t3I5n6xbm4swjemQtiSD2tEwqhpR1S3124xpj0Ub01dP/aqJ9lx4d8XytN1A01HgozWrsxuQMVnQ4jMDERkMDAPyRGQn6mbkLcLtXWRMu/UN5bGxpnFRmOJQKAfRpDeyTx8+WrsGpxW3VL3isRKTpltqzZXBocANwHDgJuDG5OtHwC+yF5rpDc7eeZe0/e3P3XmXHEXU2Fk7TSfQ4KGw3+NpNMuoR4SCQIB9R1m/CtP9tJgMVPV+Vd0fOF1V96/3OlpVbSoK0yFnT5vOGVOnkefzEfb7yfP5OGPqzpy1U9epGjZp4CBuOWwmA8JhQj4fAa+XvUeO5oFjjmd0n2KCXi8Br5cdBg7i0eNPTDsFhDFdXVt6Ew0GrgWGqurhIrI9sLuq3p3NANOx3kQ9T008xrrKSgbm57dqauZccFRZVV5GYSBIn+RtLFVlTUUFPq+HknDT8wQZ0xVkapzBvcnXFcn3XwL/Ajo9GZieJ+TzM7JPcZvXcxyHez6czyOffoyIcNqOO3HKlKlpl31/xQr+MHsWpZWV7D1yND/ba28Kg42fTVRGozz9xefMWbWSbfr248TJOzAwvwCPSKORuyLS6BnBR2vX8MTnn1ITjzNz3Hj2GTWapVs2869PP2ZdVSX7j96Gw8aOs/EIpktpy5XBHFXdpX55SxH5UFWnNrPOPcCRwDpVnZzmcwFuBo4AqnBvRbU4t65dGZitDv7HvSzelDoD546DBvPUCSentN02511ufOftlLaA18ubp59FSX5Bbdv6qiqOfuRBNtdUUx2PE/B68Xs8PHjcd9lx0OAW47l9znv8Zc67RBIJHFXCfj8TBwzgs9JS4o5D3HEI+/1s268///r2Ce2qWmZMe2VqnEGliPTH7T2HiOwGbGlhnfuAw5r5/HBgXPJ1DnB7G+IxvdyTCz5rlAjA/Wb+2pLFte+j8Tg3NUgE4M78eel//5PS9qd336a0qrK21kA0kaAyFuOnL/2n0foNrS4v55b336E6Hq/teVQVizFv9Wpq4nHijlPb9uWG9Tz86cetP1hjsqwtyeBS4BlgGxF5G3gAuKi5FVR1FtDcxOlHAw+o612gWESGtCEm04s99tmnTX72z3on2leWfJ22JgDAnNUrU96/tHhR7Um7vmVbtrChqqrZeGZ9sxRvKx8e18TjPPfVwlYta0xnaMs16ufAU7i3c8qBp3GfG3TEMGB5vfcrkm2NRu2IyDm4Vw+MHDmyg7s1PUHY3/SD5rC/rjh9n2amem7Y8yfU5G0bbdS9tKE8n4+2lJpvLn5jOltbrgwewJ2C4ne4cxSNA/7Rwf2n+7eT9kucqt6pqtNVdXpJSUm6RUwvc8GM3Zr87KJd6j7bc+SoJk/kR283MeX9SZOnNEoIPhF2Gz6CwhbqBxwwZmyTVyAN/0fP8/k5eYcdm92eMZ2pLclgvKqepaqvJV/nANt1cP8rgBH13g8HVnVwm6aXmDZkaNoT6nk778K2/funtN191LGNBomNKe7L1fsfmNJ29rTp7DliFKHkuId8v59RxX254ZDDW4ynIBDgjplHk+/3UxAIkO/3E/R6uWjGbvTPC5PvDxBOtp04eQcO2Wbbdhy1MdnRlt5E9wF3JO/tIyK7Aqep6v+1sN5o4LkmehPNBC7E7U20K3CLqs5oKRbrTWTqW1G2hXs+mIdHhLOn7cKggoK0y0Xjce76YB4ryrZw5HYT2GNE07cbF25Yz2fr1jK8qA+7DB3WqkIyW1XHYryxbCmRRJy9R46iX16YuOPw1jfL2FRdzYxhwxlWVNTm4zSmozJV3GYBMB74Jtk0ElgAOICq6pQ06zwM7AcMANYCVwF+3BXuSHYt/Qtuj6Mq4AxVbfEsb8nAGGPaLlODzprrIpqWqp7UwucKXNDW7RpjjMmsVicDVV2WzUCMMcbkjs2oZYwxxpKBMcYYSwbGGGOwZGCMMQZLBsYYY7BkYIwxBksGxhhjsGRgjDEGSwbGGGOwZGCMMQZLBsYYY7BkYIwxBksGxhhjsGRgjDEGSwbGGGOwZGCMMQZLBsYYY7BkYIwxBksGxhhjsGRgjDEGSwbGGGOwZGCMMQbw5TqA3mDJJ8v4920vUrpiAzOOmMYhp+1HXn4o12EZY0wtSwZZ9sZj7/DHM/5CLBLHSTh89PpnPHXzC9w25zryi8K5Ds8YYwC7TZRVsWiMP51zB5GqKE7CASBSFaV0+XqeuuWFHEdnjDF1LBlk0ZJPvkEdbdQerYnx5hPv5iAiY4xJz5JBFoWLwiQSibSfFRTnd3I0xhjTNEsGWTR83BCGbTsEj0dS2kP5QY794RE5isoYYxqzZJBlv/335QzeZhB5BSHCRXn4g36OufBw9jxmRq5DM8aYWtabKMsGjSrhvoW3sODdL9m0dgsTdxtHv8F9cx2WMcaksGTQCUSE7Xcfn9KWSCSY99LHrFq8hrE7jmbyXhMQkSa2YIwx2ZX1ZCAihwE3A17gLlW9rsHn+wH/BpYkm55U1d9mO65c2rhmEz/a+1dsWreFRCyBx+dl9KQRXP/KlTYYzRiTE1l9ZiAiXuA24HBge+AkEdk+zaJvqurU5KtHJwKAG8+6nbXLSqkuryFaE6OmoobFHy7l/isfyXVoxpheKtsPkGcAi1T1a1WNAo8AR2d5n11aNBJj3ssfk4g7Ke2xSIyXH5iVo6iMMb1dtpPBMGB5vfcrkm0N7S4iH4nIf0RkUroNicg5IjJXROaWlpZmI9ZOoY6TdiAaQCKefkyCMcZkW7aTQbonog3PhPOBUaq6I3Ar8HS6Danqnao6XVWnl5SUZDbKThTMCzJx13GNHhZ7fV72PNa6mxpjciPbyWAFMKLe++HAqvoLqGqZqlYkf38B8IvIgCzHlVM/vvt8CvrmEwoHAQgVhOg/rC9nXXdKjiMzxvRW2e5NNAcYJyJjgJXAicD36i8gIoOBtaqqIjIDN0FtyHJcOTVi/DD+sfgv/O+ht/jmi5WMnz6Wfb+7O4FQINehGWN6qawmA1WNi8iFwH9xu5beo6qfich5yc/vAI4HzheROFANnKiq6W+q51A8FuebBSspKA4zcGTzt6le/eebbC7dwuHnHExenvvtf/2qjWwpLWPEhGEEgn7y++Sz3wl7sG75eoaOHdxsIohFYyz/YhVF/QsYMKx/Ro/LGGMApAued1s0ffp0nTt3bqftb9bj7/Cnc/5GIpEgEUswdqcxXPX4T+g/JHUk8av/fJPfn3pLylORA07ei81ry/jkrQX4/W7uPfv6U/j8nS9549HZ+AI+ErEEx158BGde+71GzxJeeuB1bvvhPagqiViCibttx68evZQ+A4qyftzGmJ5FROap6vS0n1kyaN6iD5ZwyV6/JFIdrW3z+jyMnDicv314Q+3Ju7o6wrfy09/z93gFJ6H11vfi8QixaLy2LRgOcvYfTuboCw6vbfv07S/42aFXE6mqt2+/l/HTx3Lz29dm7BiNMb1Dc8nAJqprwVO3vEAsEktpS8QdVn+9lsUfLa1t++uFdze5jfqJwF0/kZIIACJVER674dmUtsdvepZovSQEkIglWPzhUlYuWt2WwzDGmGZZMmjBum/W46QZF+D1edi4enPt+1Vfr+3wvrZsKE95X7p8A+ku3Lx+L5vWbG78gTHGtJMlgxZMP3RHAnmNH+7GInG2m75N7ftDz9i/w/uauOu41H0fsiP+oL/RcolYgjFTRnV4f8YYs5UlgxYcee7BFJcU4Q/UdbwK5Qf59o+OpLikT23bId/fj3BRXtpt+EN1J3Sf30t+nzCBkJ+tz4o9Xg+h/BDn/PHUlPWOvfgICvrm42uw71OuPJ78onAmDs8YYwB7gNwqZRvKeezGZ3j76TkU9ivguItnss/xuzXq+RONRvnZIdfy6VsLUFUGjyrhdy/+klWL1vDYDc+wYfUmph+yIyf+7FhKl6/nn797kuVfrGS7Xbbl5Cu+zcgJjWfq2LR2M/+6/t+8/58PKB5YxPGXHsUe39qlsw7dGNODWG8iY4wxzSYDK27TCu+9MJ/rT7uVsg0ViMCkPSdw0i+O5bfH30SkKgJAfp8wf3jpV7z6z7d46f7XiUViTD9sKuffdDqDRjUepPbKg7N44Nf/Yv3KjQzfbihnX38quxw6tZOPzBhjXHZl0IIv5y7ighk/b/XygZCfaI3bFdXjEQr7F3LfwlsoKM6vXeb5v7/M7T+6vzaRAATzAvzm6cvY+eAdMxe8McbUY+MMOuDWi5oeP5DO1kQA4DhKTWUNL93/em2bqnLvFQ+nJAKASHWUu37+UIdiNcaY9rJk0ILlC1e1vFAzIlVRFs5ZVPu+uqKGis1VaZdd0cF9GWNMe1kyaMGgUQM7tH4gL8CYHerGBITyg+QVpK9zPHh0x/ZljDHtZcmgBRfcfHqblq8/JkAEAkE/h//ggNo2j8fDSb84jmCylsFWwXCAM645qUOxGmNMe1kyaMGUfSbx0/suJBiuG4U8YsIwfvT38/D6vLVt/qCPq5+9nH2O3x2f34d4hEl7TODPb13daIbR7/z4KM645kSK+hcgIgwY3p9L/34eexxt4weMMblhvYnaoKaqBl/Ah89X9+2/uqIavJ7augUATrLOcf1kkY6qEo/F8QcaTzlhjDGZZuMMgC/nLebxm55j7dJ1TDt4CsdceHjamgBLP1vOn8/9G19/vIw+JUWc9fuTGTFhKJcfeg2b125BPMKMI3biwlvO4PTxl5CIuUXsA3kBHl59O+ftcBmly91CbYGQn7/OvY7bL32AeS99BLhTU/zmyZ8waPQgHr/pWZZ9tpyJu43j2z86ipLhjQvXVJZV8eztL/Huc3PpN7gvx118BJP3mpjFv5QxpjfqFVcGsx5/h+tP+wvRSAx1lEDIT35xPnfMv55+g+sK1HwxZxE/3O3naWcKzTRfwIeTcHASDr6Al2BekFvf/R0jxtdNSVG5pZLzpl3GxtWbiNbE3GcQeUHOu+k0jjzn4OwHaYzpUXr1OINEPMHN599JpDqKJqeijtbEKN9QzsO/fypl2etOuaVTEgFAPBrHSTjJ3xNUlVXzt5/+I2WZf9/2Ym0iAFB16x787cf3U11Z0zmBGmN6hR6fDFYuWkM0Em/UHo8leO/5+Sltqxev6aywGlFVPn79s5S22f+emzKIbSuP18PiD5d2UmTGmN6gxyeDguJw7X39hooGFKa89/pz+wgl3Cd1WurigenrHCfiCQr7FXRGSMaYXqLHJ4N+g/syac/x+PypPXtC+UG+c+lRKW0HfG+vzgssdfZrguEAx150eErbcRfPbDQeweP1MGzcEEZNHJ7tCI0xvUiPTwYAv/rXpWw7bRuCeQHy+4TxB/0cd8lM9vnO7inLXfr38xi38zYpbeGiPPoNK260zUC4cfWzQF7jLqLBdMuF/Mw4fCf3QXYyngNO2ovjf5yanKYdNIXTrz6BQDLuUH6QkROHc82zrZ84zxhjWqNX9Cba6psvVrJh1UbGTh1NUb/CJpdbvnAl7z43jzGTRzI9Oa301x8v5d5f/YvBo0s498bv4/P5WPrFcn5+yDX4/F5ufPtaBg7uS0VFBVfO/AMVm6u4/B8XMXbKaABuOvsOlnz2Dd+/8nh2OWwaAKuXrGX11+sYtf1w+g/p20Q0bvfSr+Z9TZ8BhSlTWxhjTFtYcZsmxGNx3nlmLssXrmLU9sPZ7cidWxwoVt+Kr1bzzjNz8Xo97HXcDAaOLGHW47O5/Uf3E4vEOPTM/Tn7ulNb3pAxxnQCSwZpbFyziR/ufgVlG8uJVEYI5gfpO6iYW2Zfm3YwWkMPX/cUD/72MRxHERFEoM/AIkq/2ZCyXDAU4Lkqm5raGJN7vXqcQVNuueAu1q/cSHV5DY6jVJfXsG5ZKbdfen+L6y79bDkPXf040ZoY8WicWCRGtCbWKBEARGqiXP3dG7NxCMYYkzG9MhmoKu8+O49EPLXLaTyW4K0n3m1x/VmPv0M81njsQlNmPzOnzTEaY0xn6pXJwJX+9lhrbpqpattGKne/O3HGmF6mVyYDEWHGEdPweFMP3+vzstexM1pcf59v74Y/0PoBarvO3LnNMRpjTGfqlckA4Id/PZv+Q/vWVh3LKwxRMqI/5910eovrjtlhFCdcfgyBvABenxdfwEcg5Kff0MbdQ/1BH7989EeZDt8YYzKq1/YmAohGYsx++n2Wf7GKUZOGs8fRu+Brw5QUyxas4J1/z8Hj9bD38bsxZMwgXn7gdf5++YPEonEOOmUfLrj5zA7HaYwxmWBdS40xxuS2a6mIHCYiC0VkkYj8LM3nIiK3JD//WESmZTsmY4wxqbKaDETEC9wGHA5sD5wkIts3WOxwYFzydQ5wezZjMsYY01i2rwxmAItU9WtVjQKPAEc3WOZo4AF1vQsUi8iQLMdljDGmnmwng2HA8nrvVyTb2roMInKOiMwVkbmlpaUZD9QYY3qzbCcDSdPW8Il1a5ZBVe9U1emqOr2kpCQjwRljjHFlu7TXCmBEvffDgVXtWCbFvHnz1ovIsnbGNABY3851u6KedDw96VjAjqcr60nHAq0/nibnwM92MpgDjBORMcBK4ETgew2WeQa4UEQeAXYFtqjq6uY2qqrtvjQQkblNda3qjnrS8fSkYwE7nq6sJx0LZOZ4spoMVDUuIhcC/wW8wD2q+pmInJf8/A7gBeAIYBFQBZyRzZiMMcY0lvUK8Kr6Au4Jv37bHfV+V+CCbMdhjDGmab1xbqI7cx1AhvWk4+lJxwJ2PF1ZTzoWyMDxdMvpKIwxxmRWb7wyMMYY04AlA2OMMb0nGYjIPSKyTkQ+zXUsHSUiI0TkNRFZICKficjFuY6pI0QkJCLvi8hHyeP5Ta5j6igR8YrIByLyXK5j6SgRWSoin4jIhyLS7acLFpFiEXlcRL5I/hvaPdcxtYeIjE/+N9n6KhORS9q9vd7yzEBE9gEqcOdBmpzreDoiOXfTEFWdLyKFwDzgGFX9PMehtYuICJCvqhUi4gfeAi5OzlXVLYnIpcB0oEhVj8x1PB0hIkuB6araIwZpicj9wJuqepeIBICwqm7OcVgdkpwUdCWwq6q2a0Bur7kyUNVZwMZcx5EJqrpaVecnfy8HFpBmPqfuIjlJYUXyrT/56rbfUkRkODATuCvXsZhUIlIE7APcDaCq0e6eCJIOBBa3NxFAL0oGPZWIjAZ2At7LcSgdkryt8iGwDnhZVbvz8fwZuAxwchxHpijwkojME5Fzch1MB20DlAL3Jm/j3SUi+bkOKgNOBB7uyAYsGXRjIlIAPAFcoqpluY6nI1Q1oapTceemmiEi3fJWnogcCaxT1Xm5jiWD9lTVabi1Ry5I3nLtrnzANOB2Vd0JqAQaFd3qTpK3ur4FPNaR7Vgy6KaS99afAB5S1SdzHU+mJC/ZXwcOy20k7bYn8K3kffZHgANE5MHchtQxqroq+XMd8BRunZLuagWwot6V5+O4yaE7OxyYr6prO7IRSwbdUPKB693AAlW9KdfxdJSIlIhIcfL3POAg4IucBtVOqvpzVR2uqqNxL91fVdVTchxWu4lIfrKTAsnbKYcA3bZHnqquAZaLyPhk04FAt+x4Uc9JdPAWEXTC3ERdhYg8DOwHDBCRFcBVqnp3bqNqtz2BU4FPkvfZAX6RnAeqOxoC3J/sEeEBHlXVbt8ls4cYBDzlfv/AB/xTVV/MbUgddhHwUPL2ytd048kxRSQMHAyc2+Ft9ZaupcYYY5pmt4mMMcZYMjDGGGPJwBhjDJYMjDHGYMnAGGMMlgyMMcZgycAYAETkdBEZ2orl7hOR45v5/HURmZ7h2IpF5P/qvd+vJ0yNbboWSwbGuE4HWkwGOVIM/F9LCxnTEZYMTI8kIqOTxUvuF5GPk8VMwiKys4i8kZyB878iMiT5TX867qjUD0UkT0SuFJE5IvKpiNyZnAKkrTEcIiLviMh8EXksObHg1mIxv0m2fyIiE5LtJSLycrL9byKyTEQGANcBY5Ox/TG5+YJ6BVoeak98xtRnycD0ZOOBO1V1ClAGXADcChyvqjsD9wDXqurjwFzgZFWdqqrVwF9UdZdkIaQ8oE0FapIn8V8CByVn/JwLXFpvkfXJ9tuBnyTbrsKdy2ga7oRwI5PtP8Odq36qqv402bYTcAmwPe60zHu2JT5jGuo1cxOZXmm5qr6d/P1B4BfAZODl5BdpL7C6iXX3F5HLgDDQD/gMeLYN+94N90T9dnJfAeCdep9vnWl2HnBc8ve9gGMBVPVFEdnUzPbfV9UVAMn5qUbjVogzpl0sGZierOHEW+XAZ6rabM1bEQkBf8Ut9bhcRH4NhNq4b8Et0nNSE59Hkj8T1P07bMutnki93+tvw5h2sdtEpicbWa/Y+UnAu0DJ1jYR8YvIpOTn5UBh8vetJ/71yfv8TfYeasa7wJ4ism1yX2ER2a6Fdd4Cvptc/hCgb5rYjMkKSwamJ1sAnCYiH+Pe6rkV98T+BxH5CPgQ2CO57H3AHclbLhHg78AnwNPAnLbuWFVLcXsoPZzc/7vAhBZW+w1wiIjMxy1YshooV9UNuLebPq33ANmYjLIprE2PlKwN/VzyAXC3ICJBIKGq8eTVy+3JUqDGZJ3dZzSm6xgJPCoiHiAKnJ3jeEwvYlcGxrSDiDwFjGnQfLmq/jcX8RjTUZYMjDHG2ANkY4wxlgyMMcZgycAYYwyWDIwxxgD/D8wvz+ZtB9kPAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.scatter(df.petal_length, df.petal_width, c_expr=df.class_);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing \n", "\n", "### Scaling of numerical features\n", "\n", "`vaex.ml` packs the common numerical scalers:\n", "\n", "* `vaex.ml.StandardScaler` - Scale features by removing their mean and dividing by their variance;\n", "* `vaex.ml.MinMaxScaler` - Scale features to a given range;\n", "* `vaex.ml.RobustScaler` - Scale features by removing their median and scaling them according to a given percentile range;\n", "* `vaex.ml.MaxAbsScaler` - Scale features by their maximum absolute value.\n", " \n", "The usage is quite similar to that of `scikit-learn`, in the sense that each transformer implements the `.fit` and `.transform` methods." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:01.969065Z", "start_time": "2021-04-13T10:15:01.862449Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ scaled_petal_length scaled_petal_width scaled_sepal_length scaled_sepal_width
0 5.9 3.0 4.2 1.5 1 0.25096730693923325 0.39617188299171285 0.06866179325140277 -0.12495760117130607
1 6.1 3.0 4.6 1.4 1 0.4784301228962429 0.26469891297233916 0.3109975341387059 -0.12495760117130607
2 6.6 2.9 4.6 1.3 1 0.4784301228962429 0.13322594295296575 0.9168368863569659 -0.3563605663033572
3 6.7 3.3 5.7 2.1 2 1.1039528667780207 1.1850097031079545 1.0380047568006185 0.5692512942248463
4 5.5 4.2 1.4 0.2 0 -1.341272404759837 -1.3129767272601438 -0.4160096885232057 2.6518779804133055
... ... ... ... ... ... ... ... ... ...
1455.2 3.4 1.4 0.2 0 -1.341272404759837 -1.3129767272601438 -0.7795132998541615 0.8006542593568975
1465.1 3.8 1.6 0.2 0 -1.2275409967813318 -1.3129767272601438 -0.9006811702978141 1.726266119885101
1475.8 2.6 4.0 1.2 1 0.13723589896072813 0.0017529729335920385-0.052506077192249874-1.0505694616995096
1485.7 3.8 1.7 0.3 0 -1.1706752927920796 -1.18150375724077 -0.17367394763590144 1.726266119885101
1496.2 2.9 4.3 1.3 1 0.30783301092848553 0.13322594295296575 0.4321654045823586 -0.3563605663033572
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ scaled_petal_length scaled_petal_width scaled_sepal_length scaled_sepal_width\n", "0 5.9 3.0 4.2 1.5 1 0.25096730693923325 0.39617188299171285 0.06866179325140277 -0.12495760117130607\n", "1 6.1 3.0 4.6 1.4 1 0.4784301228962429 0.26469891297233916 0.3109975341387059 -0.12495760117130607\n", "2 6.6 2.9 4.6 1.3 1 0.4784301228962429 0.13322594295296575 0.9168368863569659 -0.3563605663033572\n", "3 6.7 3.3 5.7 2.1 2 1.1039528667780207 1.1850097031079545 1.0380047568006185 0.5692512942248463\n", "4 5.5 4.2 1.4 0.2 0 -1.341272404759837 -1.3129767272601438 -0.4160096885232057 2.6518779804133055\n", "... ... ... ... ... ... ... ... ... ...\n", "145 5.2 3.4 1.4 0.2 0 -1.341272404759837 -1.3129767272601438 -0.7795132998541615 0.8006542593568975\n", "146 5.1 3.8 1.6 0.2 0 -1.2275409967813318 -1.3129767272601438 -0.9006811702978141 1.726266119885101\n", "147 5.8 2.6 4.0 1.2 1 0.13723589896072813 0.0017529729335920385 -0.052506077192249874 -1.0505694616995096\n", "148 5.7 3.8 1.7 0.3 0 -1.1706752927920796 -1.18150375724077 -0.17367394763590144 1.726266119885101\n", "149 6.2 2.9 4.3 1.3 1 0.30783301092848553 0.13322594295296575 0.4321654045823586 -0.3563605663033572" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "scaler = vaex.ml.StandardScaler(features=features, prefix='scaled_')\n", "scaler.fit(df)\n", "df_trans = scaler.transform(df)\n", "df_trans" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The output of the `.transform` method of any `vaex.ml` transformer is a _shallow copy_ of a DataFrame that contains the resulting features of the transformations in addition to the original columns. A shallow copy means that this new DataFrame just references the original one, and no extra memory is used. In addition, the resulting features, in this case the scaled numerical features are _virtual columns,_ which do not take any memory but are computed on the fly when needed. This approach is ideal for working with very large datasets." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Encoding of categorical features\n", "\n", "`vaex.ml` contains several categorical encoders:\n", "\n", "* `vaex.ml.LabelEncoder` - Encoding features with as many integers as categories, startinfg from 0;\n", "* `vaex.ml.OneHotEncoder` - Encoding features according to the one-hot scheme;\n", "* `vaex.ml.MultiHotEncoder` - Encoding features according to the multi-hot scheme (binary vector);\n", "* `vaex.ml.FrequencyEncoder` - Encode features by the frequency of their respective categories;\n", "* `vaex.ml.BayesianTargetEncoder` - Encode categories with the mean of their target value;\n", "* `vaex.ml.WeightOfEvidenceEncoder` - Encode categories their weight of evidence value.\n", " \n", " The following is a quick example using the Titanic dataset." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:03.177265Z", "start_time": "2021-04-13T10:15:03.143397Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# pclasssurvived name sex age sibsp parch ticket farecabin embarked boat bodyhome_dest
0 1True Allen, Miss. Elisabeth Walton female29 0 0 24160211.338B5 S 2 nanSt Louis, MO
1 1True Allison, Master. Hudson Trevor male 0.9167 1 2 113781151.55 C22 C26S 11 nanMontreal, PQ / Chesterville, ON
2 1False Allison, Miss. Helen Loraine female 2 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON
3 1False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781151.55 C22 C26S -- 135Montreal, PQ / Chesterville, ON
4 1False Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON
" ], "text/plain": [ " # pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home_dest\n", " 0 1 True Allen, Miss. Elisabeth Walton female 29 0 0 24160 211.338 B5 S 2 nan St Louis, MO\n", " 1 1 True Allison, Master. Hudson Trevor male 0.9167 1 2 113781 151.55 C22 C26 S 11 nan Montreal, PQ / Chesterville, ON\n", " 2 1 False Allison, Miss. Helen Loraine female 2 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON\n", " 3 1 False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781 151.55 C22 C26 S -- 135 Montreal, PQ / Chesterville, ON\n", " 4 1 False Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = vaex.datasets.titanic()\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:05.014900Z", "start_time": "2021-04-13T10:15:04.289615Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# pclasssurvived name sex age sibsp parch ticket farecabin embarked boat bodyhome_dest label_encoded_embarked embarked_missing embarked_C embarked_Q embarked_S embarked_0 embarked_1 embarked_2 frequency_encoded_embarked mean_encoded_embarked woe_encoded_embarked
0 1True Allen, Miss. Elisabeth Walton female29 0 0 24160211.338B5 S 2 nanSt Louis, MO 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431
1 1True Allison, Master. Hudson Trevor male 0.9167 1 2 113781151.55 C22 C26S 11 nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431
2 1False Allison, Miss. Helen Loraine female 2 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431
3 1False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781151.55 C22 C26S -- 135Montreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431
4 1False Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431
" ], "text/plain": [ " # pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home_dest label_encoded_embarked embarked_missing embarked_C embarked_Q embarked_S embarked_0 embarked_1 embarked_2 frequency_encoded_embarked mean_encoded_embarked woe_encoded_embarked\n", " 0 1 True Allen, Miss. Elisabeth Walton female 29 0 0 24160 211.338 B5 S 2 nan St Louis, MO 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431\n", " 1 1 True Allison, Master. Hudson Trevor male 0.9167 1 2 113781 151.55 C22 C26 S 11 nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431\n", " 2 1 False Allison, Miss. Helen Loraine female 2 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431\n", " 3 1 False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781 151.55 C22 C26 S -- 135 Montreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431\n", " 4 1 False Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 1 0 0 0.698243 0.337472 -0.696431" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "label_encoder = vaex.ml.LabelEncoder(features=['embarked'])\n", "one_hot_encoder = vaex.ml.OneHotEncoder(features=['embarked'])\n", "multi_hot_encoder = vaex.ml.MultiHotEncoder(features=['embarked'])\n", "freq_encoder = vaex.ml.FrequencyEncoder(features=['embarked'])\n", "bayes_encoder = vaex.ml.BayesianTargetEncoder(features=['embarked'], target='survived')\n", "woe_encoder = vaex.ml.WeightOfEvidenceEncoder(features=['embarked'], target='survived')\n", "\n", "df = label_encoder.fit_transform(df)\n", "df = one_hot_encoder.fit_transform(df)\n", "df = multi_hot_encoder.fit_transform(df)\n", "df = freq_encoder.fit_transform(df)\n", "df = bayes_encoder.fit_transform(df)\n", "df = woe_encoder.fit_transform(df)\n", "\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2020-01-02T13:09:43.742926Z", "start_time": "2020-01-02T13:09:43.676031Z" } }, "source": [ "Notice that the transformed features are all included in the resulting DataFrame and are appropriately named. This is excellent for the construction of various diagnostic plots, and engineering of more complex features. The fact that the resulting (encoded) features take no memory, allows one to try out or combine a variety of preprocessing steps without spending any extra memory. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Feature Engineering\n", "\n", "### KBinsDiscretizer\n", "\n", "With the `KBinsDiscretizer` you can convert a continous into a discrete feature by binning the data into specified intervals. You can specify the number of bins, the strategy on how to determine their size:\n", "\n", "* \"uniform\" - all bins have equal sizes;\n", "* \"quantile\" - all bins have (approximately) the same number of samples in them;\n", "* \"kmeans\" - values in each bin belong to the same 1D cluster as determined by the `KMeans` algorithm." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:07.793286Z", "start_time": "2021-04-13T10:15:07.742886Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/jovan/vaex/packages/vaex-core/vaex/ml/transformations.py:1089: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in age are removed.Consider decreasing the number of bins.\n", " warnings.warn(f'Bins whose width are too small (i.e., <= 1e-8) in {feat} are removed.'\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# pclasssurvived name sex age sibsp parch ticket farecabin embarked boat bodyhome_dest label_encoded_embarked embarked_missing embarked_C embarked_Q embarked_S frequency_encoded_embarked mean_encoded_embarked woe_encoded_embarked binned_age
0 1True Allen, Miss. Elisabeth Walton female29 0 0 24160211.338B5 S 2 nanSt Louis, MO 1 0 0 0 1 0.698243 0.337472 -0.696431 0
1 1True Allison, Master. Hudson Trevor male 0.9167 1 2 113781151.55 C22 C26S 11 nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0
2 1False Allison, Miss. Helen Loraine female 2 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0
3 1False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781151.55 C22 C26S -- 135Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0
4 1False Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0
" ], "text/plain": [ " # pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home_dest label_encoded_embarked embarked_missing embarked_C embarked_Q embarked_S frequency_encoded_embarked mean_encoded_embarked woe_encoded_embarked binned_age\n", " 0 1 True Allen, Miss. Elisabeth Walton female 29 0 0 24160 211.338 B5 S 2 nan St Louis, MO 1 0 0 0 1 0.698243 0.337472 -0.696431 0\n", " 1 1 True Allison, Master. Hudson Trevor male 0.9167 1 2 113781 151.55 C22 C26 S 11 nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0\n", " 2 1 False Allison, Miss. Helen Loraine female 2 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0\n", " 3 1 False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781 151.55 C22 C26 S -- 135 Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0\n", " 4 1 False Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kbdisc = vaex.ml.KBinsDiscretizer(features=['age'], n_bins=5, strategy='quantile')\n", "df = kbdisc.fit_transform(df)\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### GroupBy Transformer\n", "\n", "The `GroupByTransformer` is a handy feature in `vaex-ml` that lets you perform a groupby aggregations on the training data, and then use those aggregations as features in the training and test sets." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:15:09.682863Z", "start_time": "2021-04-13T10:15:09.591867Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# pclasssurvived name sex age sibsp parch ticket farecabin embarked boat bodyhome_dest label_encoded_embarked embarked_missing embarked_C embarked_Q embarked_S frequency_encoded_embarked mean_encoded_embarked woe_encoded_embarked binned_age age_mean age_std fare_mean fare_std
0 1True Allen, Miss. Elisabeth Walton female29 0 0 24160211.338B5 S 2 nanSt Louis, MO 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226
1 1True Allison, Master. Hudson Trevor male 0.9167 1 2 113781151.55 C22 C26S 11 nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226
2 1False Allison, Miss. Helen Loraine female 2 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226
3 1False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781151.55 C22 C26S -- 135Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226
4 1False Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25 1 2 113781151.55 C22 C26S -- nanMontreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226
" ], "text/plain": [ " # pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home_dest label_encoded_embarked embarked_missing embarked_C embarked_Q embarked_S frequency_encoded_embarked mean_encoded_embarked woe_encoded_embarked binned_age age_mean age_std fare_mean fare_std\n", " 0 1 True Allen, Miss. Elisabeth Walton female 29 0 0 24160 211.338 B5 S 2 nan St Louis, MO 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226\n", " 1 1 True Allison, Master. Hudson Trevor male 0.9167 1 2 113781 151.55 C22 C26 S 11 nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226\n", " 2 1 False Allison, Miss. Helen Loraine female 2 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226\n", " 3 1 False Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781 151.55 C22 C26 S -- 135 Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226\n", " 4 1 False Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25 1 2 113781 151.55 C22 C26 S -- nan Montreal, PQ / Chesterville, ON 1 0 0 0 1 0.698243 0.337472 -0.696431 0 39.1599 14.5224 87.509 80.3226" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbt = vaex.ml.GroupByTransformer(by='pclass', agg={'age': ['mean', 'std'],\n", " 'fare': ['mean', 'std'],\n", " })\n", "df = gbt.fit_transform(df)\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CycleTransformer\n", "\n", "The `CycleTransformer` provides a strategy for transforming cyclical features, such as angles or time. This is done by considering each feature to be describing a polar coordinate system, and converting it to Cartesian coorindate system. \n", "This is shown to help certain ML models to achieve better performance." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:55:09.248159Z", "start_time": "2021-04-13T10:55:09.225352Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# days days_x days_y
0 0 1 0
1 1 0.62349 0.781831
2 2-0.222521 0.974928
3 3-0.900969 0.433884
4 4-0.900969-0.433884
5 5-0.222521-0.974928
6 6 0.62349 -0.781831
" ], "text/plain": [ " # days days_x days_y\n", " 0 0 1 0\n", " 1 1 0.62349 0.781831\n", " 2 2 -0.222521 0.974928\n", " 3 3 -0.900969 0.433884\n", " 4 4 -0.900969 -0.433884\n", " 5 5 -0.222521 -0.974928\n", " 6 6 0.62349 -0.781831" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = vaex.from_arrays(days=[0, 1, 2, 3, 4, 5, 6])\n", "cyctrans = vaex.ml.CycleTransformer(n=7, features=['days'])\n", "cyctrans.fit_transform(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dimensionality reduction \n", "\n", "### Principal Component Analysis\n", "\n", "The [PCA](https://en.wikipedia.org/wiki/Principal_component_analysis) implemented in `vaex.ml` can scale to a very large number of samples, even if that data we want to transform does not fit into RAM. To demonstrate this, let us do a PCA transformation on the Iris dataset. For this example, we have replicated this dataset thousands of times, such that it contains over **1 billion** samples." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:27:00.511609Z", "start_time": "2021-04-13T10:15:10.667961Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of samples in DataFrame: 1,005,000,000\n" ] } ], "source": [ "df = vaex.datasets.iris_1e9()\n", "n_samples = len(df)\n", "print(f'Number of samples in DataFrame: {n_samples:,}')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:31:54.111826Z", "start_time": "2021-04-13T10:27:00.539429Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "92d9ff2d39464ba1acdf6bf812e079e5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e17a5474ac84415cbccc65d9c14d05ad", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f037bdb78f6a43818da3be78bb89a45f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "pca = vaex.ml.PCA(features=features, n_components=4)\n", "pca.fit(df, progress='widget')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The PCA transformer implemented in `vaex.ml` can be fit in well under a minute, even when the data comprises 4 columns and 1 billion rows. " ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:33:28.471868Z", "start_time": "2021-04-13T10:33:28.433622Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ PCA_0 PCA_1 PCA_2 PCA_3
0 5.9 3.0 4.2 1.5 1 -0.51109806050657190.10228410590320294 0.13232789125239366 -0.05010053260756789
1 6.1 3.0 4.6 1.4 1 -0.89016044564845710.03381244269907491 -0.0097680289049917950.1534482059864868
2 6.6 2.9 4.6 1.3 1 -1.0432977809309918-0.2289569106597803 -0.41481456509035997 0.03752354509774891
3 6.7 3.3 5.7 2.1 2 -2.275853649246034 -0.3333865237191275 0.28467815436304544 0.062230281630705805
4 5.5 4.2 1.4 0.2 0 2.5971594768136956 -1.1000219282272325 0.16358191524058419 0.09895807321522321
... ... ... ... ... ... ... ... ... ...
1,004,999,9955.2 3.4 1.4 0.2 0 2.6398212682948925 -0.3192900674870881 -0.1392533720548284 -0.06514104909063131
1,004,999,9965.1 3.8 1.6 0.2 0 2.537573370908207 -0.5103675457748862 0.17191840236558648 0.19216594960009262
1,004,999,9975.8 2.6 4.0 1.2 1 -0.22887904987726520.4022576190683287 -0.22736270650701024 -0.01862045442675292
1,004,999,9985.7 3.8 1.7 0.3 0 2.199077961161723 -0.8792440894091085 -0.11452146077196179 -0.025326942106218664
1,004,999,9996.2 2.9 4.3 1.3 1 -0.6416902782168139-0.019071177408365406-0.20417287674016232 0.02050967222367117
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ PCA_0 PCA_1 PCA_2 PCA_3\n", "0 5.9 3.0 4.2 1.5 1 -0.5110980605065719 0.10228410590320294 0.13232789125239366 -0.05010053260756789\n", "1 6.1 3.0 4.6 1.4 1 -0.8901604456484571 0.03381244269907491 -0.009768028904991795 0.1534482059864868\n", "2 6.6 2.9 4.6 1.3 1 -1.0432977809309918 -0.2289569106597803 -0.41481456509035997 0.03752354509774891\n", "3 6.7 3.3 5.7 2.1 2 -2.275853649246034 -0.3333865237191275 0.28467815436304544 0.062230281630705805\n", "4 5.5 4.2 1.4 0.2 0 2.5971594768136956 -1.1000219282272325 0.16358191524058419 0.09895807321522321\n", "... ... ... ... ... ... ... ... ... ...\n", "1,004,999,995 5.2 3.4 1.4 0.2 0 2.6398212682948925 -0.3192900674870881 -0.1392533720548284 -0.06514104909063131\n", "1,004,999,996 5.1 3.8 1.6 0.2 0 2.537573370908207 -0.5103675457748862 0.17191840236558648 0.19216594960009262\n", "1,004,999,997 5.8 2.6 4.0 1.2 1 -0.2288790498772652 0.4022576190683287 -0.22736270650701024 -0.01862045442675292\n", "1,004,999,998 5.7 3.8 1.7 0.3 0 2.199077961161723 -0.8792440894091085 -0.11452146077196179 -0.025326942106218664\n", "1,004,999,999 6.2 2.9 4.3 1.3 1 -0.6416902782168139 -0.019071177408365406 -0.20417287674016232 0.02050967222367117" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_trans = pca.transform(df)\n", "df_trans" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Recall that the transformed DataFrame, which includes the PCA components, takes no extra memory. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Incremental PCA\n", "\n", "The PCA implementation in vaex is very fast, but more so for \"tall\" DataFrames, i.e. DataFrames that have many rows, but not many columns. For DataFrames that have hundreds of columns, it is more efficient to use an Incremental PCA method. `vaex.ml` provides a convenient method that essentialy wraps `sklearn.decomposition.IncrementalPCA`, the fitting of which is more efficient for \"wide\" DataFrames. \n", "\n", "The usage is practically identical to the regular PCA method. Consider the following example:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:44:40.861332Z", "start_time": "2021-04-13T10:44:38.804288Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7d86be352dbd45fdb0bf34fce0bebd13", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# feat_0 feat_1 feat_2 feat_3 feat_4 feat_5 feat_6 feat_7 feat_8 feat_9 feat_10 feat_11 feat_12 feat_13 feat_14 feat_15 feat_16 feat_17 feat_18 feat_19 feat_20 feat_21 feat_22 feat_23 feat_24 feat_25 feat_26 feat_27 feat_28 feat_29 feat_30 feat_31 feat_32 feat_33 feat_34 feat_35 feat_36 feat_37 feat_38 feat_39 feat_40 feat_41 feat_42 feat_43 feat_44 feat_45 feat_46 feat_47 feat_48 feat_49 PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 PCA_6 PCA_7 PCA_8 PCA_9
0 0.21916619701436382-1.1435438188965208-2.236473242690611 -8.81728920352771 1.9931414225984159 0.8289809515418928 -7.847441537857684 -5.990636964340006 0.43889103534482576-6.4855757436955965-14.48532696768287113.825392548457543 -5.5661773929038185-3.1816868599382633 27.66565101972783650.541940500115366 16.001390451665785 32.510983357481614 8.342038455860216 -1.7293759207235855-6.451472523437187 22.55340570655327 -2.543125122041264528.75425936065127 -39.487762558467345 -6.871003398404642 11.198673922236354 -86.63832306461876 -7.32368079105989237.35407351193795 23.653897939827836 39.52047029873747 42.79143756690254 -33.3810495394693 33.05317072490505 14.818285601642208 -67.03187283353228 -19.01476952180615 22.4905763733386 35.33833686808974 11.79457050704157 -86.70070654092856 25.185781359852896 20.521240128349977 19.814114866123216 78.05531698592385 10.029892443326418 -97.39820288821723 -0.9603735180566161-64.45083314406774 -67.59977551168708 9.37969253153906 -96.6057651764448 11.206098841188833 74.90790318762694 17.531645576460654 21.26591694292548 27.215113714718253-85.31326664717933 10.507088586039371
1 -0.42076958781498162.3850692704428043 -1.3661921493141755-0.57464980721204832.2588675039630703 -5.100101894797036 -0.0005433423021984177-3.0055202143012365 5.749693220009271 11.379708067727588 10.119772822286162 0.15698369211085733-10.937595546203902-31.110839874678003 -5.593388174686233-17.48851742053923519.942127063793418 -0.6804349583522779-19.03708392463745428.74230527011865 12.40206875918237 -9.990549218761593 -5.733244330514869 3.171827795840886 -43.944372783025386 -25.8820588524763123.517534442545183 -25.10463172872150417.068162563601867-26.188188765123446-17.51765346352225 -5.803234686368941 23.37461204071744 85.58386322836444 -24.84250900935848 42.2583557612343 -34.83625774127584447.25447854289113 -5.903960946365425 47.891908734840925 -9.673715993876817 -17.5774774820285274.066254744412671 -51.377913297883865-11.51987006746566810.497653831847085 16.358701536495925 -18.3914825056028029.858101501060483 -39.819369217021595-38.74298336407881 12.412960580526423 -16.79176108824452714.714058887306741 8.607153125744537 -6.384705477156807 -52.8779915958480663.667728062420572 -19.219755720289232 -16.20164176309122
2 -0.50247974091959910.9897062935454243 -1.152229281759237 -1.682033038083704 -4.091345910790923 -4.52742403771885552.129578282936375 10.936320913755608 -1.5695520680947808-6.034199421988269 -28.46431144964817 -15.32129294377632 -8.194011820344523 -16.218630438043398 12.021916867709596-4.908477966578501 -29.56619559878632 7.772108300044394 7.680046493196698 13.815505542053483 3.9208120473170016 47.34661694033482 1.544881077052938 9.440027347582042 18.56198304730558 22.3336072648248 -21.578332510459486-48.93092663572265616.5701671385727 16.656088505245513 19.8406469884787 5.384567961213235 -16.73392428744861614.376438801233908-35.323974854495155-7.411178531711759 -12.19133679331107557.91740496088699 34.873491696833774 88.28464395597479 87.65337555912684 -2.4096431528212445-7.8171455961597385-4.016403896979926 -22.96261029782406 -75.8940296403038 -38.8951677113029 -89.75675908427556 -79.5994302281645 -44.45310265105787 -42.34987503786076 -74.13417710288375 -94.54423466637282 -40.877591489278196-73.38521818144409 -14.487330945685514-6.8530939766408885-10.84894017617582-0.0388656483260952478.63468911909872
3 0.12617606561304665-0.91728226378698231.8277090696240983 -1.8883963021695365-3.26085343817413436.94314682034098 -1.964291832580844 5.476441728997025 5.985807394356193 -4.152754646002149 15.497819324027216 1.9473222994398216 -11.1546653716116812.1502221820849754 7.402217623202724 -20.974198348221123-18.49611969411084 -11.197532751079477-4.167571500828548 -16.7492676033496866.873971547452746 -22.28958212850625421.69520422160094 10.732001896726413 -24.901621899667955 13.663451847361172 40.92498717076184 62.02571061444625 97.46935359691241 1.3197202988059933 -13.355307678605655-59.98623606960067 -15.3460319107594843.85479178918432068.451030763844253 -37.3610034378942059.316605927851759 -15.936791503025487-14.200047091850191-96.04376311885646 6.793212237372706 -89.28406931570937 -6.342536181747704 9.84276729692308 -44.15480258178421 -19.716315609075178-8.963766643638541 13.328160220454095 -81.91979053839731 -58.49057458242536 -63.82740201878286 -78.04284003367316 6.898497938656784 -9.975022259994258 -24.581867540712196-43.13228076360685 5.384602201485904 -5.104240140134616-88.56822933573116 18.63888133757838
4 -1.5391949931048126-0.84243862338608713.808044749153777 -1.15040861016063344.975092670034785 -4.03814322037485956.475255733889277 -8.492789285986634 -0.71070840841147211.9868439665217876 -6.335098977847596 18.156422121050845 -3.9319838484429286-0.303888675665301 -18.038103704497153.6137256391127717 12.72102405166281 6.1797872895139765 -17.965746423694828-6.457595529218324 -11.1195782584740362.124546751440085 2.074247115486158 48.526431477044895 -47.7501423866134 -13.2189838629703170.7076755883915242 21.272708498626173 20.218314701800175-4.052289437744317 -28.29098298558251744.10471192261346 27.505033879695844 28.4585973718932739.564898635025768 -6.2001475733889375-33.28464087248315 13.562356933449957 72.47202649403566 -17.63088820680735222.257347577113283 19.793786901529828 -0.888840951088124115.45297619768772 80.01687713977846 -33.02953241445338 47.36388577265113 47.96488983389095 30.47783230830538 52.702201767487 56.4647664098084 27.388702583308334 47.716980722531005 48.86243093017444 -29.47766470897874 76.66863902366097 23.114022602360667 -3.03590434662457820.751371509793366 25.70018487608435
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
99,995-1.160081518789358 -1.5967802399231468-2.15232040817518 -5.152880656063202 -2.81607683456671464.528707893808043 -9.219048918475725 -4.1152783877843895 15.434762333635224 -8.352240079142867 3.2341379115026694 7.679896402408659 19.99465474797146 -15.987822176846745 17.610005841221454-2.9940634500799996-36.9849615488119246.455731448290355 0.8700910607593357 -4.458798902046075 -8.573291238859795 1.7866347197434056 -5.748202862095839 -78.73536930217278 0.8664468950376607 -31.185290130437014-33.40360643789874548.79496517134476 4.273021608667145 -14.76645480929473223.034033698309216 47.916505903411704 22.82356373157275 58.17074570864146 13.075446180847607 5.357406097709567 19.301741918502767 30.91481630395726 -18.99658045583839429.068050048521297 -11.50032407194181 -94.16793562743486 10.247859328520715 -23.33364253340996864.88951899816107 -5.970342533069689 22.724974186922207 -46.358784230253264-76.06357310802707 36.34299568143191 34.5263251515797 -74.93722963856585 -51.83676476605647 28.086594105181963 -1.148488347990102264.59414944331482 -19.3363913041026487.146369194433403 -94.50249266159257 -11.416642775370095
99,9960.133221661855605742.0608209742055763 2.1641428725239287 -2.450274442812819 0.5729664553821341 11.655164926233269 -9.864613671442203 -4.600216494861485 10.08600220223909 5.916293624542951 14.812935982731668 -6.453293834403917 -11.90549514770099 -3.26727352515574 1.8764801411441934-20.02012175801679 20.579289884690567 -7.95774658159159 -8.387038826710807 -18.0222209635527342.692329970764943 -14.30398788132729721.66822494391352 -15.938191880312708-35.29052532512791 -8.631818482611655 9.787860087044647 -53.67539155301477 -6.29070859522252334.35010506794386 6.565193250636609 -15.486170359730892-3.031599295669413 -1.80098865175289345.55563650252154 -37.38886935392985 68.02203785140463 69.71021558546443 67.33004345391464 38.09747878907309 -15.32336767996999276.84362563371494 -35.79579407415943 -32.88316495646942 -23.620694143487448-90.01728440515039 -24.77449621235016567.92281355721133 30.03415640434173 -29.32574935340052 -21.82606452589530525.41085028514592 70.39416642353444 -29.213531794756513-90.47462518115402 -14.585892147549302-36.17160238891088 -33.2209566185244976.76852716941656 -18.539072237418367
99,9971.011157114782744 -0.80040986269630711.2571486498281934 3.8492594702419245 0.7592605926849842 -4.098302780814329 -1.9485099180060705 16.684513355922583 10.087604365608211 3.7452922672933973 -16.33173839915188 19.92199866574765 6.5771681345498845 -0.3230579773623871714.72654802079624613.583443459677845 -4.952279711617992 17.030998980346084 4.201801219449127 -3.910793205671661441.77733885408281 7.96614686571076 -39.10848664323428 -33.69630280939279 -7.463352385087283 7.458696462843669 -5.883303405785125 6.6310954865277845 -6.552748916196248-9.325031603876797 -11.7337490011325093.627520914240156 18.155090307885395 33.4073875839576 45.52621736035822 -22.938060053594263-27.364572553649534-58.35071648799318 -62.86375816449011 19.272818436422003 47.61050132614527 -11.301762317420524-82.24660966605563 16.961463120018315 13.762199024990316 9.330554417908111 -96.02479832620445 -24.711048464719337-2.078012378653908 -10.604821752483073-11.558372427683931-3.6825332773046875-23.548620629546026-95.72823548883444 15.77594599796893 14.557196623771969 15.812183077424558 -82.30672442508799-8.68501822662248 44.23079310012721
99,9980.9852518578365336 0.8203281912686264 -3.884122502896842 -0.95908400432742780.16746213933285223-0.8886763063332375-16.842052417441188 0.0198139466128886246.1752951086966466 -18.13326524831207 -0.33033598775980267.829297546305325 -10.4252625074002822.7819145440653568 1.158097590630274 30.6780239575918 -23.9448164051634155.6018938249159245 -35.65399756657973 2.673171211427327 -2.90883222148649 -3.59167991497657157.002401397456594 14.353272681106485 -20.458739593063836 -47.09280369705129 25.90478920629466 1.8398979773599367 20.39037292398545 6.635600259567852 21.290136759712006 -30.6802383525156 -32.70023383447721 -28.294300515770139.030591834969087 41.28614556628407 -3.340280013558715 -6.387187312457969 -6.795058954505738 -29.239868647721906-84.84487823247701 21.53413969040578 -9.656174756794805 85.86389211836673 -54.80830511204367 -30.709179188326925-20.51621281362256680.1393974655775 -15.86831043391858 69.46209659371226 66.36652900849339 -25.10453716959171579.18237523289388 -25.577375106247562-30.87284219351464 -56.81179164164408 83.71581743144066 -9.27379265343866519.727630954137673 -85.96069547051928
99,9990.280172477999310550.8792488188373339 -2.611294241397942 -1.271843401381004 -5.583106681289557 2.0063535490559556 8.803561240522425 5.065652252075632 8.014785992140089 2.726435130640515 12.46703945978122 -0.87624409106155750.313008136552742734.259569516217728 -8.76361980315363527.42697941843017 -18.4957182932119153.2235230804059354 19.09973219172654 -21.25726264511826 -10.180990877752983-1.519950417648088522.71070295724785 29.616379288189506 -0.1316424396912179417.225907298944403 5.9791658138855075 11.74845639489894 -4.90066391424355351.065677623825266 -3.7948783924044243-32.70626521313637 -49.77902739808171 -38.9673863548757 4.223577391775786 -26.91850352108989666.81964173436637 76.24293014754961 -31.65153708363635622.893190015052674 -36.482595175686725-25.30090587669703 -10.0417262668186585.274361409552595 -34.88489743571424498.35907785706063 23.57152847224355 26.457155702616525 -86.30659590503936 12.050979659904716 3.057710144296827 -86.50100893855216 23.845662599505307 27.79510549576583 97.55955420927998 -40.44816836188145 2.789198094433643 -4.188993886405869-29.329836024823493 -40.232345894787784
" ], "text/plain": [ "# feat_0 feat_1 feat_2 feat_3 feat_4 feat_5 feat_6 feat_7 feat_8 feat_9 feat_10 feat_11 feat_12 feat_13 feat_14 feat_15 feat_16 feat_17 feat_18 feat_19 feat_20 feat_21 feat_22 feat_23 feat_24 feat_25 feat_26 feat_27 feat_28 feat_29 feat_30 feat_31 feat_32 feat_33 feat_34 feat_35 feat_36 feat_37 feat_38 feat_39 feat_40 feat_41 feat_42 feat_43 feat_44 feat_45 feat_46 feat_47 feat_48 feat_49 PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 PCA_6 PCA_7 PCA_8 PCA_9\n", "0 0.21916619701436382 -1.1435438188965208 -2.236473242690611 -8.81728920352771 1.9931414225984159 0.8289809515418928 -7.847441537857684 -5.990636964340006 0.43889103534482576 -6.4855757436955965 -14.485326967682871 13.825392548457543 -5.5661773929038185 -3.1816868599382633 27.665651019727836 50.541940500115366 16.001390451665785 32.510983357481614 8.342038455860216 -1.7293759207235855 -6.451472523437187 22.55340570655327 -2.5431251220412645 28.75425936065127 -39.487762558467345 -6.871003398404642 11.198673922236354 -86.63832306461876 -7.323680791059892 37.35407351193795 23.653897939827836 39.52047029873747 42.79143756690254 -33.3810495394693 33.05317072490505 14.818285601642208 -67.03187283353228 -19.01476952180615 22.4905763733386 35.33833686808974 11.79457050704157 -86.70070654092856 25.185781359852896 20.521240128349977 19.814114866123216 78.05531698592385 10.029892443326418 -97.39820288821723 -0.9603735180566161 -64.45083314406774 -67.59977551168708 9.37969253153906 -96.6057651764448 11.206098841188833 74.90790318762694 17.531645576460654 21.26591694292548 27.215113714718253 -85.31326664717933 10.507088586039371\n", "1 -0.4207695878149816 2.3850692704428043 -1.3661921493141755 -0.5746498072120483 2.2588675039630703 -5.100101894797036 -0.0005433423021984177 -3.0055202143012365 5.749693220009271 11.379708067727588 10.119772822286162 0.15698369211085733 -10.937595546203902 -31.110839874678003 -5.593388174686233 -17.488517420539235 19.942127063793418 -0.6804349583522779 -19.037083924637454 28.74230527011865 12.40206875918237 -9.990549218761593 -5.733244330514869 3.171827795840886 -43.944372783025386 -25.882058852476312 3.517534442545183 -25.104631728721504 17.068162563601867 -26.188188765123446 -17.51765346352225 -5.803234686368941 23.37461204071744 85.58386322836444 -24.84250900935848 42.2583557612343 -34.836257741275844 47.25447854289113 -5.903960946365425 47.891908734840925 -9.673715993876817 -17.577477482028527 4.066254744412671 -51.377913297883865 -11.519870067465668 10.497653831847085 16.358701536495925 -18.391482505602802 9.858101501060483 -39.819369217021595 -38.74298336407881 12.412960580526423 -16.791761088244527 14.714058887306741 8.607153125744537 -6.384705477156807 -52.877991595848066 3.667728062420572 -19.219755720289232 -16.20164176309122\n", "2 -0.5024797409195991 0.9897062935454243 -1.152229281759237 -1.682033038083704 -4.091345910790923 -4.5274240377188555 2.129578282936375 10.936320913755608 -1.5695520680947808 -6.034199421988269 -28.46431144964817 -15.32129294377632 -8.194011820344523 -16.218630438043398 12.021916867709596 -4.908477966578501 -29.56619559878632 7.772108300044394 7.680046493196698 13.815505542053483 3.9208120473170016 47.34661694033482 1.544881077052938 9.440027347582042 18.56198304730558 22.3336072648248 -21.578332510459486 -48.930926635722656 16.5701671385727 16.656088505245513 19.8406469884787 5.384567961213235 -16.733924287448616 14.376438801233908 -35.323974854495155 -7.411178531711759 -12.191336793311075 57.91740496088699 34.873491696833774 88.28464395597479 87.65337555912684 -2.4096431528212445 -7.8171455961597385 -4.016403896979926 -22.96261029782406 -75.8940296403038 -38.8951677113029 -89.75675908427556 -79.5994302281645 -44.45310265105787 -42.34987503786076 -74.13417710288375 -94.54423466637282 -40.877591489278196 -73.38521818144409 -14.487330945685514 -6.8530939766408885 -10.84894017617582 -0.03886564832609524 78.63468911909872\n", "3 0.12617606561304665 -0.9172822637869823 1.8277090696240983 -1.8883963021695365 -3.2608534381741343 6.94314682034098 -1.964291832580844 5.476441728997025 5.985807394356193 -4.152754646002149 15.497819324027216 1.9473222994398216 -11.154665371611681 2.1502221820849754 7.402217623202724 -20.974198348221123 -18.49611969411084 -11.197532751079477 -4.167571500828548 -16.749267603349686 6.873971547452746 -22.289582128506254 21.69520422160094 10.732001896726413 -24.901621899667955 13.663451847361172 40.92498717076184 62.02571061444625 97.46935359691241 1.3197202988059933 -13.355307678605655 -59.98623606960067 -15.346031910759484 3.8547917891843206 8.451030763844253 -37.361003437894205 9.316605927851759 -15.936791503025487 -14.200047091850191 -96.04376311885646 6.793212237372706 -89.28406931570937 -6.342536181747704 9.84276729692308 -44.15480258178421 -19.716315609075178 -8.963766643638541 13.328160220454095 -81.91979053839731 -58.49057458242536 -63.82740201878286 -78.04284003367316 6.898497938656784 -9.975022259994258 -24.581867540712196 -43.13228076360685 5.384602201485904 -5.104240140134616 -88.56822933573116 18.63888133757838\n", "4 -1.5391949931048126 -0.8424386233860871 3.808044749153777 -1.1504086101606334 4.975092670034785 -4.0381432203748595 6.475255733889277 -8.492789285986634 -0.7107084084114721 1.9868439665217876 -6.335098977847596 18.156422121050845 -3.9319838484429286 -0.303888675665301 -18.03810370449715 3.6137256391127717 12.72102405166281 6.1797872895139765 -17.965746423694828 -6.457595529218324 -11.119578258474036 2.124546751440085 2.074247115486158 48.526431477044895 -47.7501423866134 -13.218983862970317 0.7076755883915242 21.272708498626173 20.218314701800175 -4.052289437744317 -28.290982985582517 44.10471192261346 27.505033879695844 28.458597371893273 9.564898635025768 -6.2001475733889375 -33.28464087248315 13.562356933449957 72.47202649403566 -17.630888206807352 22.257347577113283 19.793786901529828 -0.8888409510881241 15.45297619768772 80.01687713977846 -33.02953241445338 47.36388577265113 47.96488983389095 30.47783230830538 52.702201767487 56.4647664098084 27.388702583308334 47.716980722531005 48.86243093017444 -29.47766470897874 76.66863902366097 23.114022602360667 -3.035904346624578 20.751371509793366 25.70018487608435\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "99,995 -1.160081518789358 -1.5967802399231468 -2.15232040817518 -5.152880656063202 -2.8160768345667146 4.528707893808043 -9.219048918475725 -4.1152783877843895 15.434762333635224 -8.352240079142867 3.2341379115026694 7.679896402408659 19.99465474797146 -15.987822176846745 17.610005841221454 -2.9940634500799996 -36.984961548811924 6.455731448290355 0.8700910607593357 -4.458798902046075 -8.573291238859795 1.7866347197434056 -5.748202862095839 -78.73536930217278 0.8664468950376607 -31.185290130437014 -33.403606437898745 48.79496517134476 4.273021608667145 -14.766454809294732 23.034033698309216 47.916505903411704 22.82356373157275 58.17074570864146 13.075446180847607 5.357406097709567 19.301741918502767 30.91481630395726 -18.996580455838394 29.068050048521297 -11.50032407194181 -94.16793562743486 10.247859328520715 -23.333642533409968 64.88951899816107 -5.970342533069689 22.724974186922207 -46.358784230253264 -76.06357310802707 36.34299568143191 34.5263251515797 -74.93722963856585 -51.83676476605647 28.086594105181963 -1.1484883479901022 64.59414944331482 -19.336391304102648 7.146369194433403 -94.50249266159257 -11.416642775370095\n", "99,996 0.13322166185560574 2.0608209742055763 2.1641428725239287 -2.450274442812819 0.5729664553821341 11.655164926233269 -9.864613671442203 -4.600216494861485 10.08600220223909 5.916293624542951 14.812935982731668 -6.453293834403917 -11.90549514770099 -3.26727352515574 1.8764801411441934 -20.02012175801679 20.579289884690567 -7.95774658159159 -8.387038826710807 -18.022220963552734 2.692329970764943 -14.303987881327297 21.66822494391352 -15.938191880312708 -35.29052532512791 -8.631818482611655 9.787860087044647 -53.67539155301477 -6.290708595222523 34.35010506794386 6.565193250636609 -15.486170359730892 -3.031599295669413 -1.800988651752893 45.55563650252154 -37.38886935392985 68.02203785140463 69.71021558546443 67.33004345391464 38.09747878907309 -15.323367679969992 76.84362563371494 -35.79579407415943 -32.88316495646942 -23.620694143487448 -90.01728440515039 -24.774496212350165 67.92281355721133 30.03415640434173 -29.32574935340052 -21.826064525895305 25.41085028514592 70.39416642353444 -29.213531794756513 -90.47462518115402 -14.585892147549302 -36.17160238891088 -33.22095661852449 76.76852716941656 -18.539072237418367\n", "99,997 1.011157114782744 -0.8004098626963071 1.2571486498281934 3.8492594702419245 0.7592605926849842 -4.098302780814329 -1.9485099180060705 16.684513355922583 10.087604365608211 3.7452922672933973 -16.33173839915188 19.92199866574765 6.5771681345498845 -0.32305797736238717 14.726548020796246 13.583443459677845 -4.952279711617992 17.030998980346084 4.201801219449127 -3.9107932056716614 41.77733885408281 7.96614686571076 -39.10848664323428 -33.69630280939279 -7.463352385087283 7.458696462843669 -5.883303405785125 6.6310954865277845 -6.552748916196248 -9.325031603876797 -11.733749001132509 3.627520914240156 18.155090307885395 33.4073875839576 45.52621736035822 -22.938060053594263 -27.364572553649534 -58.35071648799318 -62.86375816449011 19.272818436422003 47.61050132614527 -11.301762317420524 -82.24660966605563 16.961463120018315 13.762199024990316 9.330554417908111 -96.02479832620445 -24.711048464719337 -2.078012378653908 -10.604821752483073 -11.558372427683931 -3.6825332773046875 -23.548620629546026 -95.72823548883444 15.77594599796893 14.557196623771969 15.812183077424558 -82.30672442508799 -8.68501822662248 44.23079310012721\n", "99,998 0.9852518578365336 0.8203281912686264 -3.884122502896842 -0.9590840043274278 0.16746213933285223 -0.8886763063332375 -16.842052417441188 0.019813946612888624 6.1752951086966466 -18.13326524831207 -0.3303359877598026 7.829297546305325 -10.425262507400282 2.7819145440653568 1.158097590630274 30.6780239575918 -23.944816405163415 5.6018938249159245 -35.65399756657973 2.673171211427327 -2.90883222148649 -3.5916799149765715 7.002401397456594 14.353272681106485 -20.458739593063836 -47.09280369705129 25.90478920629466 1.8398979773599367 20.39037292398545 6.635600259567852 21.290136759712006 -30.6802383525156 -32.70023383447721 -28.29430051577013 9.030591834969087 41.28614556628407 -3.340280013558715 -6.387187312457969 -6.795058954505738 -29.239868647721906 -84.84487823247701 21.53413969040578 -9.656174756794805 85.86389211836673 -54.80830511204367 -30.709179188326925 -20.516212813622566 80.1393974655775 -15.86831043391858 69.46209659371226 66.36652900849339 -25.104537169591715 79.18237523289388 -25.577375106247562 -30.87284219351464 -56.81179164164408 83.71581743144066 -9.273792653438665 19.727630954137673 -85.96069547051928\n", "99,999 0.28017247799931055 0.8792488188373339 -2.611294241397942 -1.271843401381004 -5.583106681289557 2.0063535490559556 8.803561240522425 5.065652252075632 8.014785992140089 2.726435130640515 12.46703945978122 -0.8762440910615575 0.31300813655274273 4.259569516217728 -8.763619803153635 27.42697941843017 -18.495718293211915 3.2235230804059354 19.09973219172654 -21.25726264511826 -10.180990877752983 -1.5199504176480885 22.71070295724785 29.616379288189506 -0.13164243969121794 17.225907298944403 5.9791658138855075 11.74845639489894 -4.900663914243553 51.065677623825266 -3.7948783924044243 -32.70626521313637 -49.77902739808171 -38.9673863548757 4.223577391775786 -26.918503521089896 66.81964173436637 76.24293014754961 -31.651537083636356 22.893190015052674 -36.482595175686725 -25.30090587669703 -10.041726266818658 5.274361409552595 -34.884897435714244 98.35907785706063 23.57152847224355 26.457155702616525 -86.30659590503936 12.050979659904716 3.057710144296827 -86.50100893855216 23.845662599505307 27.79510549576583 97.55955420927998 -40.44816836188145 2.789198094433643 -4.188993886405869 -29.329836024823493 -40.232345894787784" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n_samples = 100_000\n", "n_columns = 50\n", "data_dict = {f'feat_{i}': np.random.normal(0, i+1, size=n_samples) for i in range(n_columns)}\n", "df = vaex.from_dict(data_dict)\n", "\n", "\n", "features = df.get_column_names()\n", "pca = vaex.ml.PCAIncremental(n_components=10, features=features, batch_size=42_000)\n", "pca.fit(df, progress='widget')\n", "pca.transform(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that you need `scikit-learn` installed to only fit the `PCAIncremental` transformer. The the `transform` method does not rely on `scikit-learn` being installed." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "### Random projections\n", "\n", "Random projections is another popular way of doing dimensionality reduction, especially when the dimensionality of the data is very high. `vaex.ml` conveniently wraps both `scikit-learn.random_projection.GaussianRandomProjection` and `scikit-learn.random_projection.SparseRandomProjection` in a single `vaex.ml` transformer. " ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T10:51:49.468518Z", "start_time": "2021-04-13T10:51:49.119344Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# feat_0 feat_1 feat_2 feat_3 feat_4 feat_5 feat_6 feat_7 feat_8 feat_9 feat_10 feat_11 feat_12 feat_13 feat_14 feat_15 feat_16 feat_17 feat_18 feat_19 feat_20 feat_21 feat_22 feat_23 feat_24 feat_25 feat_26 feat_27 feat_28 feat_29 feat_30 feat_31 feat_32 feat_33 feat_34 feat_35 feat_36 feat_37 feat_38 feat_39 feat_40 feat_41 feat_42 feat_43 feat_44 feat_45 feat_46 feat_47 feat_48 feat_49 random_projection_0 random_projection_1 random_projection_2 random_projection_3 random_projection_4 random_projection_5 random_projection_6 random_projection_7 random_projection_8 random_projection_9
0 0.21916619701436382-1.1435438188965208-2.236473242690611 -8.81728920352771 1.9931414225984159 0.8289809515418928 -7.847441537857684 -5.990636964340006 0.43889103534482576-6.4855757436955965-14.48532696768287113.825392548457543 -5.5661773929038185-3.1816868599382633 27.66565101972783650.541940500115366 16.001390451665785 32.510983357481614 8.342038455860216 -1.7293759207235855-6.451472523437187 22.55340570655327 -2.543125122041264528.75425936065127 -39.487762558467345 -6.871003398404642 11.198673922236354 -86.63832306461876 -7.32368079105989237.35407351193795 23.653897939827836 39.52047029873747 42.79143756690254 -33.3810495394693 33.05317072490505 14.818285601642208 -67.03187283353228 -19.01476952180615 22.4905763733386 35.33833686808974 11.79457050704157 -86.70070654092856 25.185781359852896 20.521240128349977 19.814114866123216 78.05531698592385 10.029892443326418 -97.39820288821723 -0.9603735180566161-64.45083314406774 -50.62485790513975 -8.969974902164104 -75.59787959901278 -32.23015488522056 -8.839635748773595 25.52280920491688 -67.81125847807398 20.625813141370337 -8.9492512335752 -38.397093148408445
1 -0.42076958781498162.3850692704428043 -1.3661921493141755-0.57464980721204832.2588675039630703 -5.100101894797036 -0.0005433423021984177-3.0055202143012365 5.749693220009271 11.379708067727588 10.119772822286162 0.15698369211085733-10.937595546203902-31.110839874678003 -5.593388174686233-17.48851742053923519.942127063793418 -0.6804349583522779-19.03708392463745428.74230527011865 12.40206875918237 -9.990549218761593 -5.733244330514869 3.171827795840886 -43.944372783025386 -25.8820588524763123.517534442545183 -25.10463172872150417.068162563601867-26.188188765123446-17.51765346352225 -5.803234686368941 23.37461204071744 85.58386322836444 -24.84250900935848 42.2583557612343 -34.83625774127584447.25447854289113 -5.903960946365425 47.891908734840925 -9.673715993876817 -17.5774774820285274.066254744412671 -51.377913297883865-11.51987006746566810.497653831847085 16.358701536495925 -18.3914825056028029.858101501060483 -39.819369217021595-24.167592671736728 -83.6194525409906 -31.474566122257382 -53.51874280599636 -9.295953556730474 12.065310248051029 21.935134361477004 -72.0479982398111 -66.96195351258001 76.22398276816658
2 -0.50247974091959910.9897062935454243 -1.152229281759237 -1.682033038083704 -4.091345910790923 -4.52742403771885552.129578282936375 10.936320913755608 -1.5695520680947808-6.034199421988269 -28.46431144964817 -15.32129294377632 -8.194011820344523 -16.218630438043398 12.021916867709596-4.908477966578501 -29.56619559878632 7.772108300044394 7.680046493196698 13.815505542053483 3.9208120473170016 47.34661694033482 1.544881077052938 9.440027347582042 18.56198304730558 22.3336072648248 -21.578332510459486-48.93092663572265616.5701671385727 16.656088505245513 19.8406469884787 5.384567961213235 -16.73392428744861614.376438801233908-35.323974854495155-7.411178531711759 -12.19133679331107557.91740496088699 34.873491696833774 88.28464395597479 87.65337555912684 -2.4096431528212445-7.8171455961597385-4.016403896979926 -22.96261029782406 -75.8940296403038 -38.8951677113029 -89.75675908427556 -79.5994302281645 -44.45310265105787 -30.370561351797924 -69.21024877654797 -131.21336032017504 -23.81397986098913 90.48694640695885 27.981469036784446 -71.13131857248655 -165.47320481693575 30.36401943353085 -37.55586272094929
3 0.12617606561304665-0.91728226378698231.8277090696240983 -1.8883963021695365-3.26085343817413436.94314682034098 -1.964291832580844 5.476441728997025 5.985807394356193 -4.152754646002149 15.497819324027216 1.9473222994398216 -11.1546653716116812.1502221820849754 7.402217623202724 -20.974198348221123-18.49611969411084 -11.197532751079477-4.167571500828548 -16.7492676033496866.873971547452746 -22.28958212850625421.69520422160094 10.732001896726413 -24.901621899667955 13.663451847361172 40.92498717076184 62.02571061444625 97.46935359691241 1.3197202988059933 -13.355307678605655-59.98623606960067 -15.3460319107594843.85479178918432068.451030763844253 -37.3610034378942059.316605927851759 -15.936791503025487-14.200047091850191-96.04376311885646 6.793212237372706 -89.28406931570937 -6.342536181747704 9.84276729692308 -44.15480258178421 -19.716315609075178-8.963766643638541 13.328160220454095 -81.91979053839731 -58.49057458242536 125.12748803342656 -25.206573635553035 61.805492059522535 15.847357808911099 -76.71575173832926 86.50353271166043 86.55719953897724 64.19018426217575 -109.12935339038033 -76.8186950536783
4 -1.5391949931048126-0.84243862338608713.808044749153777 -1.15040861016063344.975092670034785 -4.03814322037485956.475255733889277 -8.492789285986634 -0.71070840841147211.9868439665217876 -6.335098977847596 18.156422121050845 -3.9319838484429286-0.303888675665301 -18.038103704497153.6137256391127717 12.72102405166281 6.1797872895139765 -17.965746423694828-6.457595529218324 -11.1195782584740362.124546751440085 2.074247115486158 48.526431477044895 -47.7501423866134 -13.2189838629703170.7076755883915242 21.272708498626173 20.218314701800175-4.052289437744317 -28.29098298558251744.10471192261346 27.505033879695844 28.4585973718932739.564898635025768 -6.2001475733889375-33.28464087248315 13.562356933449957 72.47202649403566 -17.63088820680735222.257347577113283 19.793786901529828 -0.888840951088124115.45297619768772 80.01687713977846 -33.02953241445338 47.36388577265113 47.96488983389095 30.47783230830538 52.702201767487 9.100443729937155 -98.2487363365348 -86.04861549617408 -10.27966060169664 57.67907962932948 -74.56592607052885 -16.669282052441403 -26.583518157157688 47.49051485779235 178.45202653205695
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
99,995-1.160081518789358 -1.5967802399231468-2.15232040817518 -5.152880656063202 -2.81607683456671464.528707893808043 -9.219048918475725 -4.1152783877843895 15.434762333635224 -8.352240079142867 3.2341379115026694 7.679896402408659 19.99465474797146 -15.987822176846745 17.610005841221454-2.9940634500799996-36.9849615488119246.455731448290355 0.8700910607593357 -4.458798902046075 -8.573291238859795 1.7866347197434056 -5.748202862095839 -78.73536930217278 0.8664468950376607 -31.185290130437014-33.40360643789874548.79496517134476 4.273021608667145 -14.76645480929473223.034033698309216 47.916505903411704 22.82356373157275 58.17074570864146 13.075446180847607 5.357406097709567 19.301741918502767 30.91481630395726 -18.99658045583839429.068050048521297 -11.50032407194181 -94.16793562743486 10.247859328520715 -23.33364253340996864.88951899816107 -5.970342533069689 22.724974186922207 -46.358784230253264-76.06357310802707 36.34299568143191 79.74173570372625 -120.99425995411295 -158.6863110682003 51.08724948440816 45.49604758883528 -92.51884988772696 -33.86586167918684 -110.19228327900962 10.471099356215348 95.03245666604596
99,9960.133221661855605742.0608209742055763 2.1641428725239287 -2.450274442812819 0.5729664553821341 11.655164926233269 -9.864613671442203 -4.600216494861485 10.08600220223909 5.916293624542951 14.812935982731668 -6.453293834403917 -11.90549514770099 -3.26727352515574 1.8764801411441934-20.02012175801679 20.579289884690567 -7.95774658159159 -8.387038826710807 -18.0222209635527342.692329970764943 -14.30398788132729721.66822494391352 -15.938191880312708-35.29052532512791 -8.631818482611655 9.787860087044647 -53.67539155301477 -6.29070859522252334.35010506794386 6.565193250636609 -15.486170359730892-3.031599295669413 -1.80098865175289345.55563650252154 -37.38886935392985 68.02203785140463 69.71021558546443 67.33004345391464 38.09747878907309 -15.32336767996999276.84362563371494 -35.79579407415943 -32.88316495646942 -23.620694143487448-90.01728440515039 -24.77449621235016567.92281355721133 30.03415640434173 -29.32574935340052 12.801266126889404 17.612236115044166 -31.111396519869256 -160.72849754950767 6.480988179687637 4.231265515946373 -52.555790176785194 -65.21246117529064 35.89601203569984 127.45678271483702
99,9971.011157114782744 -0.80040986269630711.2571486498281934 3.8492594702419245 0.7592605926849842 -4.098302780814329 -1.9485099180060705 16.684513355922583 10.087604365608211 3.7452922672933973 -16.33173839915188 19.92199866574765 6.5771681345498845 -0.3230579773623871714.72654802079624613.583443459677845 -4.952279711617992 17.030998980346084 4.201801219449127 -3.910793205671661441.77733885408281 7.96614686571076 -39.10848664323428 -33.69630280939279 -7.463352385087283 7.458696462843669 -5.883303405785125 6.6310954865277845 -6.552748916196248-9.325031603876797 -11.7337490011325093.627520914240156 18.155090307885395 33.4073875839576 45.52621736035822 -22.938060053594263-27.364572553649534-58.35071648799318 -62.86375816449011 19.272818436422003 47.61050132614527 -11.301762317420524-82.24660966605563 16.961463120018315 13.762199024990316 9.330554417908111 -96.02479832620445 -24.711048464719337-2.078012378653908 -10.604821752483073-2.4863267734391865 -10.434958342024952 -37.55392055999496 6.171867513827003 -29.256283776632728 -72.71591584878013 40.24611847925469 -102.31580552627864 -14.905953231227388 -11.740055851590997
99,9980.9852518578365336 0.8203281912686264 -3.884122502896842 -0.95908400432742780.16746213933285223-0.8886763063332375-16.842052417441188 0.0198139466128886246.1752951086966466 -18.13326524831207 -0.33033598775980267.829297546305325 -10.4252625074002822.7819145440653568 1.158097590630274 30.6780239575918 -23.9448164051634155.6018938249159245 -35.65399756657973 2.673171211427327 -2.90883222148649 -3.59167991497657157.002401397456594 14.353272681106485 -20.458739593063836 -47.09280369705129 25.90478920629466 1.8398979773599367 20.39037292398545 6.635600259567852 21.290136759712006 -30.6802383525156 -32.70023383447721 -28.294300515770139.030591834969087 41.28614556628407 -3.340280013558715 -6.387187312457969 -6.795058954505738 -29.239868647721906-84.84487823247701 21.53413969040578 -9.656174756794805 85.86389211836673 -54.80830511204367 -30.709179188326925-20.51621281362256680.1393974655775 -15.86831043391858 69.46209659371226 -70.00012029923253 198.0368255008663 129.3714720510582 30.652606384505287 -65.3920698996377 49.51640293990293 11.882703005485045 93.26651618256129 35.206089617027985 -61.77494520916369
99,9990.280172477999310550.8792488188373339 -2.611294241397942 -1.271843401381004 -5.583106681289557 2.0063535490559556 8.803561240522425 5.065652252075632 8.014785992140089 2.726435130640515 12.46703945978122 -0.87624409106155750.313008136552742734.259569516217728 -8.76361980315363527.42697941843017 -18.4957182932119153.2235230804059354 19.09973219172654 -21.25726264511826 -10.180990877752983-1.519950417648088522.71070295724785 29.616379288189506 -0.1316424396912179417.225907298944403 5.9791658138855075 11.74845639489894 -4.90066391424355351.065677623825266 -3.7948783924044243-32.70626521313637 -49.77902739808171 -38.9673863548757 4.223577391775786 -26.91850352108989666.81964173436637 76.24293014754961 -31.65153708363635622.893190015052674 -36.482595175686725-25.30090587669703 -10.0417262668186585.274361409552595 -34.88489743571424498.35907785706063 23.57152847224355 26.457155702616525 -86.30659590503936 12.050979659904716 45.50866581430373 33.59123204918983 66.48747993035953 93.58220327847411 -113.34727146050997 34.20894130389669 94.5050429333418 98.6447663145478 -42.700555543235716 -3.632586769281134
" ], "text/plain": [ "# feat_0 feat_1 feat_2 feat_3 feat_4 feat_5 feat_6 feat_7 feat_8 feat_9 feat_10 feat_11 feat_12 feat_13 feat_14 feat_15 feat_16 feat_17 feat_18 feat_19 feat_20 feat_21 feat_22 feat_23 feat_24 feat_25 feat_26 feat_27 feat_28 feat_29 feat_30 feat_31 feat_32 feat_33 feat_34 feat_35 feat_36 feat_37 feat_38 feat_39 feat_40 feat_41 feat_42 feat_43 feat_44 feat_45 feat_46 feat_47 feat_48 feat_49 random_projection_0 random_projection_1 random_projection_2 random_projection_3 random_projection_4 random_projection_5 random_projection_6 random_projection_7 random_projection_8 random_projection_9\n", "0 0.21916619701436382 -1.1435438188965208 -2.236473242690611 -8.81728920352771 1.9931414225984159 0.8289809515418928 -7.847441537857684 -5.990636964340006 0.43889103534482576 -6.4855757436955965 -14.485326967682871 13.825392548457543 -5.5661773929038185 -3.1816868599382633 27.665651019727836 50.541940500115366 16.001390451665785 32.510983357481614 8.342038455860216 -1.7293759207235855 -6.451472523437187 22.55340570655327 -2.5431251220412645 28.75425936065127 -39.487762558467345 -6.871003398404642 11.198673922236354 -86.63832306461876 -7.323680791059892 37.35407351193795 23.653897939827836 39.52047029873747 42.79143756690254 -33.3810495394693 33.05317072490505 14.818285601642208 -67.03187283353228 -19.01476952180615 22.4905763733386 35.33833686808974 11.79457050704157 -86.70070654092856 25.185781359852896 20.521240128349977 19.814114866123216 78.05531698592385 10.029892443326418 -97.39820288821723 -0.9603735180566161 -64.45083314406774 -50.62485790513975 -8.969974902164104 -75.59787959901278 -32.23015488522056 -8.839635748773595 25.52280920491688 -67.81125847807398 20.625813141370337 -8.9492512335752 -38.397093148408445\n", "1 -0.4207695878149816 2.3850692704428043 -1.3661921493141755 -0.5746498072120483 2.2588675039630703 -5.100101894797036 -0.0005433423021984177 -3.0055202143012365 5.749693220009271 11.379708067727588 10.119772822286162 0.15698369211085733 -10.937595546203902 -31.110839874678003 -5.593388174686233 -17.488517420539235 19.942127063793418 -0.6804349583522779 -19.037083924637454 28.74230527011865 12.40206875918237 -9.990549218761593 -5.733244330514869 3.171827795840886 -43.944372783025386 -25.882058852476312 3.517534442545183 -25.104631728721504 17.068162563601867 -26.188188765123446 -17.51765346352225 -5.803234686368941 23.37461204071744 85.58386322836444 -24.84250900935848 42.2583557612343 -34.836257741275844 47.25447854289113 -5.903960946365425 47.891908734840925 -9.673715993876817 -17.577477482028527 4.066254744412671 -51.377913297883865 -11.519870067465668 10.497653831847085 16.358701536495925 -18.391482505602802 9.858101501060483 -39.819369217021595 -24.167592671736728 -83.6194525409906 -31.474566122257382 -53.51874280599636 -9.295953556730474 12.065310248051029 21.935134361477004 -72.0479982398111 -66.96195351258001 76.22398276816658\n", "2 -0.5024797409195991 0.9897062935454243 -1.152229281759237 -1.682033038083704 -4.091345910790923 -4.5274240377188555 2.129578282936375 10.936320913755608 -1.5695520680947808 -6.034199421988269 -28.46431144964817 -15.32129294377632 -8.194011820344523 -16.218630438043398 12.021916867709596 -4.908477966578501 -29.56619559878632 7.772108300044394 7.680046493196698 13.815505542053483 3.9208120473170016 47.34661694033482 1.544881077052938 9.440027347582042 18.56198304730558 22.3336072648248 -21.578332510459486 -48.930926635722656 16.5701671385727 16.656088505245513 19.8406469884787 5.384567961213235 -16.733924287448616 14.376438801233908 -35.323974854495155 -7.411178531711759 -12.191336793311075 57.91740496088699 34.873491696833774 88.28464395597479 87.65337555912684 -2.4096431528212445 -7.8171455961597385 -4.016403896979926 -22.96261029782406 -75.8940296403038 -38.8951677113029 -89.75675908427556 -79.5994302281645 -44.45310265105787 -30.370561351797924 -69.21024877654797 -131.21336032017504 -23.81397986098913 90.48694640695885 27.981469036784446 -71.13131857248655 -165.47320481693575 30.36401943353085 -37.55586272094929\n", "3 0.12617606561304665 -0.9172822637869823 1.8277090696240983 -1.8883963021695365 -3.2608534381741343 6.94314682034098 -1.964291832580844 5.476441728997025 5.985807394356193 -4.152754646002149 15.497819324027216 1.9473222994398216 -11.154665371611681 2.1502221820849754 7.402217623202724 -20.974198348221123 -18.49611969411084 -11.197532751079477 -4.167571500828548 -16.749267603349686 6.873971547452746 -22.289582128506254 21.69520422160094 10.732001896726413 -24.901621899667955 13.663451847361172 40.92498717076184 62.02571061444625 97.46935359691241 1.3197202988059933 -13.355307678605655 -59.98623606960067 -15.346031910759484 3.8547917891843206 8.451030763844253 -37.361003437894205 9.316605927851759 -15.936791503025487 -14.200047091850191 -96.04376311885646 6.793212237372706 -89.28406931570937 -6.342536181747704 9.84276729692308 -44.15480258178421 -19.716315609075178 -8.963766643638541 13.328160220454095 -81.91979053839731 -58.49057458242536 125.12748803342656 -25.206573635553035 61.805492059522535 15.847357808911099 -76.71575173832926 86.50353271166043 86.55719953897724 64.19018426217575 -109.12935339038033 -76.8186950536783\n", "4 -1.5391949931048126 -0.8424386233860871 3.808044749153777 -1.1504086101606334 4.975092670034785 -4.0381432203748595 6.475255733889277 -8.492789285986634 -0.7107084084114721 1.9868439665217876 -6.335098977847596 18.156422121050845 -3.9319838484429286 -0.303888675665301 -18.03810370449715 3.6137256391127717 12.72102405166281 6.1797872895139765 -17.965746423694828 -6.457595529218324 -11.119578258474036 2.124546751440085 2.074247115486158 48.526431477044895 -47.7501423866134 -13.218983862970317 0.7076755883915242 21.272708498626173 20.218314701800175 -4.052289437744317 -28.290982985582517 44.10471192261346 27.505033879695844 28.458597371893273 9.564898635025768 -6.2001475733889375 -33.28464087248315 13.562356933449957 72.47202649403566 -17.630888206807352 22.257347577113283 19.793786901529828 -0.8888409510881241 15.45297619768772 80.01687713977846 -33.02953241445338 47.36388577265113 47.96488983389095 30.47783230830538 52.702201767487 9.100443729937155 -98.2487363365348 -86.04861549617408 -10.27966060169664 57.67907962932948 -74.56592607052885 -16.669282052441403 -26.583518157157688 47.49051485779235 178.45202653205695\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "99,995 -1.160081518789358 -1.5967802399231468 -2.15232040817518 -5.152880656063202 -2.8160768345667146 4.528707893808043 -9.219048918475725 -4.1152783877843895 15.434762333635224 -8.352240079142867 3.2341379115026694 7.679896402408659 19.99465474797146 -15.987822176846745 17.610005841221454 -2.9940634500799996 -36.984961548811924 6.455731448290355 0.8700910607593357 -4.458798902046075 -8.573291238859795 1.7866347197434056 -5.748202862095839 -78.73536930217278 0.8664468950376607 -31.185290130437014 -33.403606437898745 48.79496517134476 4.273021608667145 -14.766454809294732 23.034033698309216 47.916505903411704 22.82356373157275 58.17074570864146 13.075446180847607 5.357406097709567 19.301741918502767 30.91481630395726 -18.996580455838394 29.068050048521297 -11.50032407194181 -94.16793562743486 10.247859328520715 -23.333642533409968 64.88951899816107 -5.970342533069689 22.724974186922207 -46.358784230253264 -76.06357310802707 36.34299568143191 79.74173570372625 -120.99425995411295 -158.6863110682003 51.08724948440816 45.49604758883528 -92.51884988772696 -33.86586167918684 -110.19228327900962 10.471099356215348 95.03245666604596\n", "99,996 0.13322166185560574 2.0608209742055763 2.1641428725239287 -2.450274442812819 0.5729664553821341 11.655164926233269 -9.864613671442203 -4.600216494861485 10.08600220223909 5.916293624542951 14.812935982731668 -6.453293834403917 -11.90549514770099 -3.26727352515574 1.8764801411441934 -20.02012175801679 20.579289884690567 -7.95774658159159 -8.387038826710807 -18.022220963552734 2.692329970764943 -14.303987881327297 21.66822494391352 -15.938191880312708 -35.29052532512791 -8.631818482611655 9.787860087044647 -53.67539155301477 -6.290708595222523 34.35010506794386 6.565193250636609 -15.486170359730892 -3.031599295669413 -1.800988651752893 45.55563650252154 -37.38886935392985 68.02203785140463 69.71021558546443 67.33004345391464 38.09747878907309 -15.323367679969992 76.84362563371494 -35.79579407415943 -32.88316495646942 -23.620694143487448 -90.01728440515039 -24.774496212350165 67.92281355721133 30.03415640434173 -29.32574935340052 12.801266126889404 17.612236115044166 -31.111396519869256 -160.72849754950767 6.480988179687637 4.231265515946373 -52.555790176785194 -65.21246117529064 35.89601203569984 127.45678271483702\n", "99,997 1.011157114782744 -0.8004098626963071 1.2571486498281934 3.8492594702419245 0.7592605926849842 -4.098302780814329 -1.9485099180060705 16.684513355922583 10.087604365608211 3.7452922672933973 -16.33173839915188 19.92199866574765 6.5771681345498845 -0.32305797736238717 14.726548020796246 13.583443459677845 -4.952279711617992 17.030998980346084 4.201801219449127 -3.9107932056716614 41.77733885408281 7.96614686571076 -39.10848664323428 -33.69630280939279 -7.463352385087283 7.458696462843669 -5.883303405785125 6.6310954865277845 -6.552748916196248 -9.325031603876797 -11.733749001132509 3.627520914240156 18.155090307885395 33.4073875839576 45.52621736035822 -22.938060053594263 -27.364572553649534 -58.35071648799318 -62.86375816449011 19.272818436422003 47.61050132614527 -11.301762317420524 -82.24660966605563 16.961463120018315 13.762199024990316 9.330554417908111 -96.02479832620445 -24.711048464719337 -2.078012378653908 -10.604821752483073 -2.4863267734391865 -10.434958342024952 -37.55392055999496 6.171867513827003 -29.256283776632728 -72.71591584878013 40.24611847925469 -102.31580552627864 -14.905953231227388 -11.740055851590997\n", "99,998 0.9852518578365336 0.8203281912686264 -3.884122502896842 -0.9590840043274278 0.16746213933285223 -0.8886763063332375 -16.842052417441188 0.019813946612888624 6.1752951086966466 -18.13326524831207 -0.3303359877598026 7.829297546305325 -10.425262507400282 2.7819145440653568 1.158097590630274 30.6780239575918 -23.944816405163415 5.6018938249159245 -35.65399756657973 2.673171211427327 -2.90883222148649 -3.5916799149765715 7.002401397456594 14.353272681106485 -20.458739593063836 -47.09280369705129 25.90478920629466 1.8398979773599367 20.39037292398545 6.635600259567852 21.290136759712006 -30.6802383525156 -32.70023383447721 -28.29430051577013 9.030591834969087 41.28614556628407 -3.340280013558715 -6.387187312457969 -6.795058954505738 -29.239868647721906 -84.84487823247701 21.53413969040578 -9.656174756794805 85.86389211836673 -54.80830511204367 -30.709179188326925 -20.516212813622566 80.1393974655775 -15.86831043391858 69.46209659371226 -70.00012029923253 198.0368255008663 129.3714720510582 30.652606384505287 -65.3920698996377 49.51640293990293 11.882703005485045 93.26651618256129 35.206089617027985 -61.77494520916369\n", "99,999 0.28017247799931055 0.8792488188373339 -2.611294241397942 -1.271843401381004 -5.583106681289557 2.0063535490559556 8.803561240522425 5.065652252075632 8.014785992140089 2.726435130640515 12.46703945978122 -0.8762440910615575 0.31300813655274273 4.259569516217728 -8.763619803153635 27.42697941843017 -18.495718293211915 3.2235230804059354 19.09973219172654 -21.25726264511826 -10.180990877752983 -1.5199504176480885 22.71070295724785 29.616379288189506 -0.13164243969121794 17.225907298944403 5.9791658138855075 11.74845639489894 -4.900663914243553 51.065677623825266 -3.7948783924044243 -32.70626521313637 -49.77902739808171 -38.9673863548757 4.223577391775786 -26.918503521089896 66.81964173436637 76.24293014754961 -31.651537083636356 22.893190015052674 -36.482595175686725 -25.30090587669703 -10.041726266818658 5.274361409552595 -34.884897435714244 98.35907785706063 23.57152847224355 26.457155702616525 -86.30659590503936 12.050979659904716 45.50866581430373 33.59123204918983 66.48747993035953 93.58220327847411 -113.34727146050997 34.20894130389669 94.5050429333418 98.6447663145478 -42.700555543235716 -3.632586769281134" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rand_proj = vaex.ml.RandomProjections(features=features, n_components=10)\n", "rand_proj.fit(df)\n", "rand_proj.transform(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Clustering\n", "\n", "### K-Means\n", "\n", "`vaex.ml` implements a fast and scalable K-Means clustering algorithm. The usage is similar to that of `scikit-learn`." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T15:58:55.155153Z", "start_time": "2020-07-14T15:58:54.920720Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Iteration 0, inertia 519.0500000000001\n", "Iteration 1, inertia 156.70447116074328\n", "Iteration 2, inertia 88.70688235734133\n", "Iteration 3, inertia 80.23054939305554\n", "Iteration 4, inertia 79.28654263977778\n", "Iteration 5, inertia 78.94084142614601\n", "Iteration 6, inertia 78.94084142614601\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ prediction_kmeans
0 5.9 3.0 4.2 1.5 1 0
1 6.1 3.0 4.6 1.4 1 0
2 6.6 2.9 4.6 1.3 1 0
3 6.7 3.3 5.7 2.1 2 1
4 5.5 4.2 1.4 0.2 0 2
... ... ... ... ... ... ...
1455.2 3.4 1.4 0.2 0 2
1465.1 3.8 1.6 0.2 0 2
1475.8 2.6 4.0 1.2 1 0
1485.7 3.8 1.7 0.3 0 2
1496.2 2.9 4.3 1.3 1 0
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ prediction_kmeans\n", "0 5.9 3.0 4.2 1.5 1 0\n", "1 6.1 3.0 4.6 1.4 1 0\n", "2 6.6 2.9 4.6 1.3 1 0\n", "3 6.7 3.3 5.7 2.1 2 1\n", "4 5.5 4.2 1.4 0.2 0 2\n", "... ... ... ... ... ... ...\n", "145 5.2 3.4 1.4 0.2 0 2\n", "146 5.1 3.8 1.6 0.2 0 2\n", "147 5.8 2.6 4.0 1.2 1 0\n", "148 5.7 3.8 1.7 0.3 0 2\n", "149 6.2 2.9 4.3 1.3 1 0" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import vaex.ml.cluster\n", "\n", "df = vaex.datasets.iris()\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "kmeans = vaex.ml.cluster.KMeans(features=features, n_clusters=3, max_iter=100, verbose=True, random_state=42)\n", "kmeans.fit(df)\n", "\n", "df_trans = kmeans.transform(df)\n", "df_trans" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "K-Means is an unsupervised algorithm, meaning that the predicted cluster labels in the transformed dataset do not necessarily correspond to the class label. We can map the predicted cluster identifiers to match the class labels, making it easier to construct diagnostic plots." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T15:58:55.795681Z", "start_time": "2020-07-14T15:58:55.783702Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ prediction_kmeans predicted_kmean_map
0 5.9 3.0 4.2 1.5 1 0 1
1 6.1 3.0 4.6 1.4 1 0 1
2 6.6 2.9 4.6 1.3 1 0 1
3 6.7 3.3 5.7 2.1 2 1 2
4 5.5 4.2 1.4 0.2 0 2 0
... ... ... ... ... ... ... ...
1455.2 3.4 1.4 0.2 0 2 0
1465.1 3.8 1.6 0.2 0 2 0
1475.8 2.6 4.0 1.2 1 0 1
1485.7 3.8 1.7 0.3 0 2 0
1496.2 2.9 4.3 1.3 1 0 1
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ prediction_kmeans predicted_kmean_map\n", "0 5.9 3.0 4.2 1.5 1 0 1\n", "1 6.1 3.0 4.6 1.4 1 0 1\n", "2 6.6 2.9 4.6 1.3 1 0 1\n", "3 6.7 3.3 5.7 2.1 2 1 2\n", "4 5.5 4.2 1.4 0.2 0 2 0\n", "... ... ... ... ... ... ... ...\n", "145 5.2 3.4 1.4 0.2 0 2 0\n", "146 5.1 3.8 1.6 0.2 0 2 0\n", "147 5.8 2.6 4.0 1.2 1 0 1\n", "148 5.7 3.8 1.7 0.3 0 2 0\n", "149 6.2 2.9 4.3 1.3 1 0 1" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_trans['predicted_kmean_map'] = df_trans.prediction_kmeans.map(mapper={0: 1, 1: 2, 2: 0})\n", "df_trans" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can construct simple scatter plots, and see that in the case of the Iris dataset, K-Means does a pretty good job splitting the data into 3 classes." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T15:58:57.379045Z", "start_time": "2020-07-14T15:58:57.198955Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/jovan/vaex/packages/vaex-core/vaex/viz/mpl.py:205: UserWarning: `scatter` is deprecated and it will be removed in version 5.x. Please use `df.viz.scatter` instead.\n", " warnings.warn('`scatter` is deprecated and it will be removed in version 5.x. Please use `df.viz.scatter` instead.')\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAAFgCAYAAACmKdhBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAACGSUlEQVR4nOzdd3wcxfnH8c/cXlVv7pViuiku2NRA6L13kgAJhBpaevulJySEHiCUBBJ6IEAglJCEUG2MMRgDpjf3bqvrys7vjzvJOt2efbLudCrf9+vlF/bcaPZZGe+jZ3d2xlhrERERERERkZ7zFTsAERERERGRgUIFloiIiIiISJ6owBIREREREckTFVgiIiIiIiJ5ogJLREREREQkT1RgiYiIiIiI5IkKLJEcGGO+b4y5Ld99cxjLGmO27ObX7GOMWZiP44uISP9ljLnDGPOL1O/3Msa810vHVe6SQU0Flgw6xpgzjDHzjDHNxpilxpibjDFVG/oaa+2vrLVfy2X87vQVEZHBzRjzqTGmxRjTaIxZZoz5szGmLN/Hsda+YK3dOod4zjDGvJjv44sMJiqwZFAxxlwOXAF8C6gEpgPjgGeMMcEsX+PvvQhFRGQQOsJaWwZMAqYCP+zaQblIpP9QgSWDhjGmAvgpcJG19ilrbcxa+ylwIski6/RUv58YYx40xtxljKkHzki13dVprC8bYz4zxqwyxvwodQdy/05ff1fq9+NTUyW+Yoz53Biz0hjzg07j7GqMmWGMWWuMWWKMuSFboedxPjWpO52LjTFrjDGPZOn3XWPMR8aYBmPMO8aYYzp9tqUx5jljzLpUbPen2o0x5mpjzPLUZ28aY3ZIfRYyxlyZOp9lxpibjTGR1Gd1xpjHU+ez2hjzgjFG1xkRkRxYaxcBTwLt11trjLnAGPMB8EGq7XBjzBup6+zLxpgd27/eGLOLMWZO6np/PxDu9FnaFDxjzBhjzN+NMStSuewGY8y2wM3AbqknamtTfbNe91OffyuVwxYbY87a0Dkqd8lgoP95ZDDZnWSy+XvnRmttI8mEdkCn5qOAB4Eq4O7O/Y0x2wE3AqcBI0g+CRu1kWPvCWwN7Af8OJXEABLApUAdsFvq8/NzPJ+/AiXA9sBQ4Oos/T4C9krF+VPgLmPMiNRnPwf+BVQDo4HrU+0HAnsDW5H8HpwErEp9dkWqfWdgS5Ln/uPUZ5cDC4EhwDDg+4DN8XxERAY1Y8wY4FDg9U7NRwPTgO2MMZOAPwFfB2qBPwL/SBUPQeARkrmhBvgbcFyW4zjA48BnwHiS1/H7rLXzgXOBGdbaMmttVepLsl73jTEHA98kmUMnAPtv5DSVu2TAU4Elg0kdsNJaG/f4bEnq83YzrLWPWGtda21Ll77HA49Za1+01kZJXqA3diH+qbW2xVo7F5gL7ARgrX3NWjvTWhtPPU37I/CFjZ1IKskcApxrrV2Tehr3nFdfa+3frLWLU+dyP8m7oLumPo6RfHo30lrbaq19sVN7ObANYKy18621S4wxBjgbuNRau9pa2wD8Cji509eNAMalYnrBWqskJSKyYY+knha9CDxH8rra7tep620LyevvH621r1hrE9baO4E2ktPdpwMB4JrU9fdB4NUsx9sVGAl8y1rb1OX6nyaH6/6JwJ+ttW9Za5uAn2Q7SeUuGSxUYMlgshKoM97z2EekPm+3YAPjjOz8ubW2mfV3yLJZ2un3zUAZgDFmq9S0hKUmOR3xV6QXetmMAVZba9dsrKNJTmdsn06yluTUk/ZjfBswwCxjzNvtUzustf8FbgD+ACwzxtxiklMsh5C88/hap/GeSrUD/A74EPiXMeZjY8x3czgXEZHB7mhrbZW1dpy19vwuN/Y656NxwOXt19/UNXgMybw0EljUpTD4LMvxxgCfZbnh2NXGrvtpOXEDx2w/rnKXDHgqsGQwmUHyTt+xnRuNMaUk76j9p1Pzhu5cLSE5JaH96yMkp2psipuAd4EJ1toKktMSTA5ftwCoMRtZ/dAYMw64FbgQqE1N93ir/RjW2qXW2rOttSNJTjm50aSW1rXWXmetnUxyGsdWJBcGWQm0ANunfhiostZWpl7OxlrbYK293Fq7OXAEcJkxZr9ufD9ERCRd53y0APhlp+tvlbW2xFp7L8ncNCr1tKbd2CxjLgDGZrnh2DX/bfC6nzrumByO2X5c5S4Z8FRgyaBhrV1Hch739caYg40xAWPMeJLz1BeSnBeeiweBI4wxu6fmvP+U3IoiL+VAPdBojNkGOC+XL7LWLiH53tiNxpjq1Lns7dG1lGSyXAFgjDmT1MvTqT+fYIxpLxbXpPomjDFTjTHTjDEBoAloBRLWWpdk0rvaGDM0NcYoY8xBqd8fnnr52KTOK5H6JSIiPXcrcG7q+myMMaXGmMOMMeUkbyLGgW8YY/zGmGNZP6Wuq1kkC6PfpMYIG2P2SH22DBidym9s7LoPPEByMajtjDElwP9lC165SwYLFVgyqFhrf0vyKdGVJC+ir5C8o7aftbYtxzHeBi4C7iOZoBqA5SSfjnXXN4FTU2PcCtzfja/9Esl54++mjn+JR6zvAL8nmXiXAROBlzp1mQq8YoxpBP4BXGyt/QSoSMWzhuR0j1Ukv2cA3yE5lWJmalrjv0ku4AHJF5z/DTSmjnmjtfZ/3TgnERHJwlo7m+S7RDeQvD5/CJyR+ixKcobGGanPTqLLok6dxkmQfFKzJfA5yZuMJ6U+/i/wNrDUGNM+dT7rdd9a+yRwTerrPkz9d0OUu2TAM3qHT6RnTHJDyLUkp/l9UuRwRERERKSI9ARLZBMYY44wxpSk3t+6EpgHfFrcqERERESk2FRgiWyao4DFqV8TgJO1pKuIiIiIaIqgiIiIiIhInugJloiIiIiISJ547X/Q59XV1dnx48cXOwwRESmQ1157baW1dsjGe/ZdylUiIgNbtlzVLwus8ePHM3v27GKHISIiBWKM+azYMfSUcpWIyMCWLVdpiqCIiIiIiEieqMASERERERHJExVYIiIiIiIieaICS0REREREJE9UYImIiIiIiOSJCiwREREREZE8UYElIiIiIiKSJyqwRERERERE8qSgBZYxZowx5lljzHxjzNvGmIs9+uxjjFlnjHkj9evHhYxJRKQnrLsOt/7nuMt3x12+F27D1Vjb2v1xWv+Lu/Jo3GVTcVd/CRudi7UWt+ke3BUH4C6bhrvmYmz88wKchbRTnhKRgchGX8NddVoyx6w8Dtv2XPfHcJtxG36Hu3xP3OV74Nb/Gus2YhNLcdd9N5mnlu+L23gb1sYLcBb9l7/A48eBy621c4wx5cBrxphnrLXvdOn3grX28ALHIiLSI9ZGsatOhMRCIJZsbPoTNvoK1NyLMSancdzmR6D+/4CWZEP0FezqL0FoX2j73/r2tqex0Zeg7nGMMzy/JyPtlKdEZECx0VnY1V8DUjf/4vOway7CVl6BL3JIbmNYF7v6dIh/ALQlG5vvxra9AO4asGuBBNg10HgdNv4OpuqqApxN/1TQJ1jW2iXW2jmp3zcA84FRhTymiEjBtD4DiWV0FFcAtEH8XYjNzmkIa11ovIKOImr94ND2VJd2F2wLtumOnkQtG6A8JSIDja2/go7iqkMrNPwGa21ug0RnQOJjOoqrZCMkPgNbDyTSx259RjMuOum1d7CMMeOBXYBXPD7ezRgz1xjzpDFm+96KSUSkO2zsTaDZ44M4xLo+8Mg2SD249dk+9GiLQTS34k16RnlKRAaE+Pve7e5y0gumDYi9Bdarb4z0m4wpJgDx+TkGOPD1SoFljCkDHgIusdZ2/cliDjDOWrsTcD3wSJYxzjHGzDbGzF6xYkVB4xUR8eSMBSKZ7SYATo4PPUxZsr/3hx5tPvBvlmOAsqnykadS4yhXiUhxOUO9200JEMxxjNFgwh4f+AEns9kmcs+Dg0DBCyxjTIBk0rrbWvv3rp9ba+uttY2p3z8BBIwxdR79brHWTrHWThkyZEihwxYRyWAiR6aKo86FkA9MOYS+kNsYxg8lXyajUDMRcLYmM/mFMKVf3fSgZaPyladSnytXiUhxlV5I5s3ACJR+FWNy/NE/vH8yL6WVCiZVpHW9SehP3gj06+F+u0KvImiA24H51lrPN9+MMcNT/TDG7JqKaVUh4xIR2RTGV46puRf825G8i+eHwGRM7X2YrE+lPMYpuxhKTieZAENgKqHsO5jaeyF8IMkiKwi+kZjqGzCBbQpyPqI8JSIDj6/kGCi/PHnzj1CyKCo9C1N6bs5jGBPC1NwHgZ1I5rsABCZiah/A1NwBzmbJNgIQ2htT8+ecF3oaDEzOL7ttyuDG7Am8AMwD3FTz94GxANbam40xFwLnkVzJqQW4zFr78obGnTJlip09W+8kiEjxWHcd4GB8ZZs+ho2C2wC+KoxxOrW3gG0BUz1oE5Yx5jVr7ZReOE5B8hQoV4lIcVkbB3ct+CowJsepgV7juA2AxfgqurSvBkIYX2mP4uzPsuWqgi7Tbq19Ee+XCjr3uQG4oZBxiIjkm/FV9nwMEwSn1qM9kpqaIYWmPCUiA5UxfnA8ZzN3bxxfeZb2mh6PPVD12iqCIiIiIiIiA50KLBERERERkTwp6BRBEZF8sG4ztulGaHkk2RA+HFN2YY/ef9oY122GNRdAbCbggjMeqq7HF9iqYMcUEZH+a97yZfzu5Rd4a/kyRpSV841pu3HQFhMKeky37QVY9/3UHld+CB8OFb/G59MzlGLSd19E+jRrXezq06HpjmQCcZdD813Y1adgbWKjX7/JVh4AsZdI7lZvIfEJrDoSN76scMcUEZF+6a3lyzj5wft48fPPWNvayvyVK7js6Se49603C3ZMN/oarPkquMtIblQfg9aHYfXJBTum5EYFloj0bdGXIfExEO3cCIkF0PZcQQ7ptv4XXK9NYl1o+FVBjikiIv3X715+gZZ4PK2tJR7ndy+9QMJ1s3xVD637kXd7/A3c+OLCHFNyogJLRPq22Ftg2zLbbTPE3y7MMdv+t4F43ijMMUVEpN96a/lyz/aWeJxVLc2FOWhiQfbPohvdSUIKSAWWiPRtzmgwYY8PSsA3qjDH9G/gPStndGGOKSIi/dao8ixLmRuoDHnlsDzY0HYh/m0Lc0zJiQosEenbwgekCqzOlysDJgThgwtzzMipJHeo91D+3cIcU0RE+q1vTNuNsD997biI388pO+xIyF+gNeXKL/Nu9w3BF9y+MMeUnKjAEpE+zZgQpuZ+COxIsujxg38HTO29GF9JQY7p8/mg9hEw1Z1aA1DxK3zBiQU5poiI9F/7b74lP/nCF6kKhwk5DuFUcfW9Pb9QsGP6IsdC6fmk/TjvGw11jxXsmJIbLdMuIn2e8Y/F1D6AdesBi9nQtIg88QUmwLBXUqsGNuHzb17wY4qISP914vYTOW7b7Vnd0kJFKFS4J1ed+MovwS39BiQ+BKcOn6+m4MeUjVOBJSL9hvFV9Poxff5hvX5MERHpnxyfjyGlpb16TJ/PBz7t0diXaIqgiIiIiIhInqjAEhERERERyRMVWCLS51m3Gbfh97jL98Jdvidu/RVYtxGbWIa77nu4y3fDXfFF3MY/YW0CG3sPd/U5uMum4a48HNvyRHKctpdxV52Iu2xX3FWnYKOvbkIs9bj1v8Rdvgfu8r1xG67F2tbs/WPzcFefkYrlGGzrf7P3tRa36T7cFQfiLpuOu/ZSbPzzbsdYDDaxFHfdd1J/F/vhNt2BtYlihyUi0mts7G3c1WelrvdHYVufSba3PYe78rhk++ovYaNvJK/3zQ/grjgo2b7mYmz8c6yN4jZcj7v8C7jLd8dd9zOsu7b7sUTn4K46PTn2quOxbS9k72td3Ka/4K7YP5V7vo1NLMneP7Ecd933O+Xe2/vN9d62Pou78tjU38VXsNG5BTmOsdYWZOBCmjJlip09e3axwxCRXmCti111PMQ/ANo3HA6CMw7sGnDXAO0X9jCEpkN0FtgWoP36FoHIYdDyONC5GApjqm/ChPbIMZYoduWRkFgIRFOtIQjsgKm5B2NMev/Ym9hVp2cck4r/w1dyXMb4bv2voOX+VOwAPjBlmLp/Ypy++y6YdddiVx4C7lrS/i7Ch+CrumKTxjTGvGatnZKvGItBuUpk8LCxd7CrTgFaOrWGIXwEtD5GRh4IHQBt/+7U3wemFPxbQ2we6/NdAJzhmLonMCaUWyzRV7Grv5p5zMrf4otkbm/irvsBtHSO0QFTgRnyJKbLohnWrceuPDh1vY93Os/98VVdlVN8xeI2/wPqf0jGzwE1d2KCu2zSmNlylZ5giUjfFp0BiY9Zn2wAopD4BNx1rP+BHqAV2p7vUlwBtEDLQ6RfVJP9bcNvco+l9d+QWMr64opkXPH5EHsto7ttuNLzmDT8Fmvd9L7uami+t1NxBeCCbcE2/Tn3GIvANt8HbhMZfxetT2ATi4oVlohIr7ENvye9uILkdfBBPPNA2+Nd+rtgmyH2Oun5LgbuKmh9ohuxXOF9zIZf0/XBik0shZZHu/RPgG3GNt2dOXbz38BtZH1xlRq79Zk+PePCWguNv8H754Ar8348FVgi0rfF5oFt8/ggDsQ82i3pxVXndq9hPsw5FBubCzR7fBCH2FuZ7bF3sgzUBHZtZhwm6NE5BtE+/hQk+iqZSQswAYjN7/VwRER6nVcOALLmHs/2BOBmNttmbHRON2J5z7vdXU7GtTo2P0vuaUtd27uOvYHrfTxLzusLbEPqqZuHAsStAktE+jZnNHhOi/DjfQkzHm0b4KvrRixjgIjHIQPgjPLoPzzbQGDKusQxAmzUo68P/ONzj7EY/JvhueuHTXh/X0REBhpnRDe/IFuu8sprYXDGdyOWoVkOGQG65FNndPImYeYg4LX/o5Pteu/27eu9KclSSAK+/E/BV4ElIn1b+IBUUuh8uTJACRmJAn+qCOraHgb/jqlxOotA6Xk5h2IiRyaLqTTJ96QI7ZPZv+wiMguyMJSciulyoTf+MRCcCnRNACFM6VdzjrEYTMmXgK7flwD4J2AC2xYjJBGRXmXKLsTzeu/fMfnfNBFwtsbreo+vlowfz40fU3JM7sGUnu8RSwRKv4ox6WObwAQIbEfGNdwEMaVfzhjalJzmUaj4wT8O/DvkHmMvM8YPJV/G6/uS/LvLLxVYItKnGRPC1NwPgYkkE0AA/Dtg6h7A1NyeXOyCYLI9uCem9gGo+CWYGpKFVggiR0PN3VB6dvIlYkLJ/5ZdgCk5JfdYfBWYmnvAv+36WAK7YGrvw2QUXmDCB0H5d8FUpmKJJIur8m96j191HYT3T51PEHwjMNXX9fkixfjHYWpuTRW3qb+L0F6YmtuKHZqISK8w4f2h4gedrvdhKDkJau6Bkq+Q/ME+BKYcyi/H1N4H4YNYf70fhqm6DlP7IASmsj7fbYWpuStjsYkNxhI5FsovTR6LUPLmYulXMFluKJrqW1I3CQPJWJzRmKqbMR5PsIx/NKb6tszcW/PnjIWe+hpTdjGUnM76v4sKKP8WJnJ4/o+lVQRFpL+w7jrAYnxV69usBXc1mBDGV9ap3U2+GOwrx5hwp/Zoch62r9qzKMo9ljWAg/FVbLyvjSdXO/RVZjy58h67Ofmys6+2zyeszrL9XWwKrSIoIv2RtYlU7qlMW/Vvfe6pST5NaW93m5Pv5frq0q731m0AYt0qrDJjiSevyb6qHHNPU3KhpRxyTz6v970t+XexLvVzgMd0x27Ilqt6NqqISC8yvsrMNmPAqfVo94EzxKM9mH1+erdiqc69r/F7xpJ97BKSUyD7l2x/FyIig4UxjmeOyZZ7sl3vja88D7H4u5XvjK8UKM1x7P57vU/+XeSekzeFpgiKiIiIiIjkiQosERERERGRPFGBJSLiwdoYbuMtuMu/iLt8d9x1P8YmVmXvH1+Iu/Zy3OW74a44ELfpnowNHfsTaxO4TXfirtg/eU7rvpfckFJERPoMG/8Md83FuMum4644BLf5oQ3mHtv6L9yVR+Eum4a7+mvYbPs19hM2sQh37Tf7XO7VO1giIh7s2kuh7Xk6NlRseRDb9j+oezI1T71T38QK7KpjkhsZ4gKroOEKbOJjTMUPezny/LD1P4CWJ4GWZEPLI53Ov6qIkYmICCSLC7vq2OQiGbiQWA31P8MmPsOUX5bR3226Bxp+Q0dei76AXfUq1N7X51er9WITK7ArjwFbT3ru/QhT8aOixqYnWCIiXdj4h+nFFQBxcNdhWx7J7N98R3LVP9xOrS3QfN8Gn3r1VTaxCFoep6O4AiABbhO2+f5ihSUiIp3YxluTq/51zT1Nf06tQtipr41D4+9Jz2sWaMU2XFPwWAvBNv8lS+69v+i5VwWWiEhXsbfBOB4ftEDs1czm6KtALLPdhCD+Xr6jK7zY/Cw73rdCdFavhyMiIh5irwHxzHYTgPjH6W3uCrAeeQoL8TcLEV3hRWcB0cx2E4L4u70eTmcqsEREunJGkbyz11UQnM0ym/2b4Xk5tbHUWP2MMxJIeHzgB//4Xg5GREQ8OWMBj/2qbAyc4eltviq88xrgG5nnwHpJH869KrBERLoKTAbfCDJeUzV+TMlJGd1NyVkkd7TvLAjBXTD+cYWKsmBMYDtwtgS6bsQcwJR8uRghiYhIF6b0HCDUpTUEoT0xzrD0viYCkeOAcJf+EUzZhQWMsnCy5t7ATpgi3wxUgSUi0oUxBlPzVwhOI1lkJJ9cmeo7MF3vCgImsDWm+g+poiyU/JrQvpiqP/Ry5Pljam6H0J6sP/8xmJpb+2XBKCIyEJngTpiqq8A3lGTuCUL4QEzl7737V/wASo5P9Q2BqYKKH2LC+/Ze0HlkAlthqm/MzL3VNxU7NExfWMqwu6ZMmWJnz55d7DBEZBCwbgPYKCaHHeutteCuBFOSsdJgf2XdRrCt4KvFGI+pKAVijHnNWjul1w5YAMpVItIbrHVTuacM4yvJoX8ruOvAV4fxfN+4fylm7s2Wq7RMu4jIBhhfee59jQFnSAGj6X3GVwaUFTsMERHJwhgfOEO70T8MTtepgv1XX8y9miIoIiIiIiKSJyqwRERERERE8kRTBEVk0LBtM7GN10D8U/BvgSm/NLliYOvj2KabwV0FgSmY8svAGYtt+jO03Au2DUL7Y8q+ASaIbbwBWv8JOBA5HlN2DsZ0XckpdczYW9iGqyD2DjgjMWUX9bkXim3r09jGGyGxLLn6UvllmMDWxQ5LRGTQsTaKbboVmh8EohA+GFN2EWCxDddD29NACEpOwJR+FRJLsA1XQ2wmmBoo/RomcgzE5mIbr4LY++Afgym7BBPaI8sxXWzzPdB8J9hGCO2NKbvUc1GnYrFuA7bxD6nc64PIcZiyr2fNvcWmRS5EZFCwrc9i115M+i72YQgfCm1Pgm1JtfnARCCwC0Rnd+rvB18dEAF3Ies3Fg5BYEdMzV0Zi0DY2DzsqtO6HDMCFf+Hr+TYvJ/jpnCb/gqNV3Y6fwMmgqn5GyYwoWhxaZELERmM3NVndck9geSeVtYFdznrc08YAjtB/B2wTYCbao9A+EBofZqMfFf5O3yRgzKPue6H0PIY0J4HHDAVmCFPYnw1eT/H7rI2hl15FCQ+Iz33TsTU3N2rCzB1lS1XaYqgiAwKtuFXpCcbkn9ufbhTcQHgJv8cndGlfxzc1V2KK4A2iL8NsTkex7zS45gt0HBFctWnIrM2Co1Xdzl/C7YF23ht0eISERmMbGwexF4jPW/EkrML3BWk555WiM0G28z64gqgBVr/gWe+a/gVXR+s2MRSaHmE9cUVQAJsc/KpVl/Q9h9wF5OZe99Jfg/6IBVYIjLgWeum7nx5furR5pKesNpFSb/Atw8Rh9i8zPbY21kO2QR2bZZ4elFiKdiExwcWYm/0djQiIoNb7C3wnFkWTf3qygWyXMO9uMvJKLxi88F03awXoA3aZm0g2N5jo3NThWTXD+LJ71kfpAJLRAY8Y3zJDRW7xevy6OD56qoJgDPKo3u2+esOmD6w9LmvBu9CEu/zERGRwnFGgue+VH6S+aerbv4YbyIkN+TtfMzRyUIlMxjwb9a98QvE+MekYu/6QSD5PeuDVGCJyOBQeg7Q9QIdAf92ZCQcQuAbRkYxZYJA171DfGBKIbRPxiFN2YUeSSEMJadgPO8Y9i7jK4PIUWSeUxhTdkExQhIRGbyCe4KvioxiyoTIzFMO+GrJvH6HwL+NR+6JQOlZyRuOnYcOTIDAtkCgyzGDmNIvb8JJFED4cDLi68i9fWvRqHYqsERkUDClX4XSr4IpAcLJC3PZeVBzb3KhC4IkC6taqLwCU/s3CE4jeVEPgrMZpvpOTO294N861R6AwM6YmvswpuvFH0z4YCj7NpiK5DEJQ8nJmPJv9t6Jb4Sp+DFEjiaZvMNgqqHiJ5jQ3kWOTERkcDHGwdTcC4FJrM89E5KLKNX+FZwtkm0Ekive1j4Ilb8H31CS1/BgcoGLmnuh7OLUTIlwstgq+TKm9Hzv41bfAqG9U8cMgW8UpuomjH+L3jnxjTC+CkzNPcnCsSP37oSpubdP3Kz0olUERWRQsTaaXKzCV5N2YbZuM9gG8A1Ju8Nn3QawbRinLn0cdzXgYHyVORwznlwC3lfVZ5eUtbYV3HXgq8N4TlHpXVpFUEQGM+uuAxvHOLXp7YlVYAIYX8X6NusmF8Ew5RhfSaf2WCr31ORUiFi3Mbnoka+uqCvzbUgy9/owvqpihwJkz1XaB0tEBhVjgp7vRiWTUolHezlQ7tGe+9K1xvjBGdadMHudMWFwuk41ERGRYsh2865rwQWp94w9cowxgQ28C+x1zDKgD7wfvAF9Ydn4XGiKoIiIiIiISJ6owBIREREREcmTgk4RNMaMAf4CDCe5FvAt1tpru/QxwLXAoUAzcIa1NnPHThEZUGzrM9jGG8FdAv6dMOWXgn8rbPO90Hwn2EYIfQFTdjH4KrGNf4SWhwELkSMxpeclN0JsvBbank0uXlFyOqbk9D7xDlEx2fin2IarIPZqcu596dchfES35tRb62Kb74bmvyT3Hwntgyn7BqaPT3XsLuUpEcmmMRrlhlkz+Mf77+IzhmO32Z7zp+7K6pYWrprxEs9//ikVwRBn7jKZU3bYEROfj224GuLzwDcCU3YRJvxFbNuLyVyVWAD+bTBll2KCOxX79IrOtjyObfojuCuTi3aUX4rxb969MRKLsQ3XQPTF5IJSJWdiSk4s+jtkBV3kwhgzAhhhrZ1jjCkHXgOOtta+06nPocBFJBPXNOBaa+20DY2rF4dF+je36V5o+A3rd443YMIQ3Bvanu/U7gAV4B8B8Y+AtlR7EJzxycUq7FqgfQ+PCIQPxFf1u146k77HxhdgVx2V2pQxtceViUDp2fjKLsx5HHfd96DlCdb/XfiTi3TUPdErLxf31iIXhcpToFwl0p/FXZcj7v0rn6xdQzSR3Mw35DhsVVvHovp61rW1kkj9DB3x+7lg5wrO3fwakhv5tv9sHYbwUdD6KOkb/IYxNXdggpN674T6GLfxFmj8A+tzjA9MBFP7CMY/LqcxbGIlduWhYOtZv6djBEpOwFfxwwJEnSlbriroFEFr7ZL2u3zW2gZgPtB198qjgL/YpJlAVSrhicgAZG0cGn/P+osqgE2uXNT2TJf2BNAI8Q9YX1wBRCHxCdh1rC+uSH5t61PY+IKCxd/X2aY/Jr+XnTcQti3QeAvWbcptjMRiaHmc9L+LOLgN2Ob78xlu0SlPiYiX/37yEQvr13UUVwBtiQTzV66gIdrWUVwBtMTjbFdyN5YW1hdXAK3Q+gDpxVWy3Tb8tpDh92nWtnQprgBcsC3YpptyH6f5zvSbiZAcs/m+5GqLRdRr72AZY8YDuwCvdPloFND5p6GFZCY3ERko3OVgo1k+9HqiHkv9yrHdBCD+Tmb7YBF9jWRh2oXxQ+LT3MaIvZP8PmZog+isHgTXtylPiUi7uUuX0hTLzDFx1yXmuhntE6uX4z0pLctMsfj8HsXXr8U/B+NVgripHJaj6CzA4+cJE4L4u5saXV70SoFljCkDHgIusdbWd/3Y40sy/m80xpxjjJltjJm9YsWKQoQpIr3BVJE14Xh/Ad6vi/rJ2O0ewCbAGbkpkQ0MzhjvdhsFX47vTzkj8SzS8IN//CYG1rflI0+lxlGuEhkARldWEvFn5h6/Mfg83u9Z0tzN5c19Qzc1tP7PGQLW68Yp2XOYF/94PEsZGwOnuPfACl5gGWMCJJPW3dbav3t0WQh0/m6OBhZ37WStvcVaO8VaO2XIkCGFCVZECs74SiByDNB1z6VI8r0quj45CYEpJf1yZZKLWmT0TRUA/h3yGXK/Ysq+Tub3NgShfTM2S846RmA7cLYgs7ANYEq+nIco+5Z85SlQrhIZKA6fsDVBx0m7u+IzhrJQiKCTfnMv4PPxxJIDgEiXUcLgn+jRHoHSC/IfdD9hfDUQ3h8IdfkkjCk7N/dxSs4Cum6gHITAjpgi3wwsaIGVWnnpdmC+tfaqLN3+AXzZJE0H1llrlxQyLhEpLlPxQ4gcS/LiGgZTCRU/wNTeD6E9SRZOQfCNxFTfjKn9G/i3T7UHwL8tpvY+TM1tqbtdoWR7cHdMzZ+LvnpQMZngZKj8LfjqSH5fghA+CFPVvfn+puZ2CLb/XYTAGY2p/mPOLx/3F8pTIuKlPBTi/uNPZpu6IQR8DgGfj4lDh/Hwiafxx8OOYmR5OSHHIeBz2Gf8Zpy7x/eg/HvJfEYYCEHJCVBzN5ScRrLICoMph/JL8JUcXdTzKzZT+RuIHEoyT4XAVwuVv8YEd819jMDWmOobwDecjp8DQvtgqnN/j6tQCr2K4J7AC8A81r+B9n1gLIC19uZUcrsBOJjk8rdnWms3uOySVmYSGRisbQF3HfiGpC2tbt3G5IurviFpxZJ1VwPpO7lba8FdkVx9yFfee8H3cda6yffdTEXyqeGmjuM2JBfJ6PJ3UWi9uIpgQfIUKFeJDBSrmpvxGUN1ZP2TKGsty5uaKAkEKA+FOrXHk8uO+6owJtypvQ3cNeCrxXi+4zo4Wbc5uQqgbyjG872sHMawNpXvSjG+bk7V7KFsuaqg+2BZa1/Ee+565z4WGLzPSUUGMWMi4HSdOkHqApl5kexcWK0fw4AziOeyZ2GMD5zhPR/HVw4M3MJVeUpENqa2JPMmlTGGYWUeecr4Pa+9xoTyck0eaJI3ADf9JiC0/xzQt/Zo7LVVBEVERERERAY6FVgiIiIiIiJ5UtApgiIi3WGti22+D5rvBNsIob0xZRdjTRDWXACx15Md/TtC1R/w+b1XabPxD7ENVyX7+4Zhys7DhA/qxTPZNNZabMtD0HR7chPl4B6Y8kswRV5uVkRE1lvS0MBVM1/i+c8+pTwU5KydJ3PyDjvy5Afv8X/P/Zc1LS2E/X6+OmkKl03fw3OM5PX+YWi+PfluVnA3TNklGH83likvEptYgW28Htr+k1zRt+R0TMnpae9SD3YFXeSiUPTisMjA5K77EbT8g/W7uztABdBK+o7vAGEYOhufL32JVhv/ELvq+OTCDO1bFZkIlF2Or7RvLzHu1l8Bzfew/lx9YMoxdf/EDLL3zHprkYtCUq4SGXhWNTdz0N13sK61lUTqZ+iI38/kkSN58fPPM/oft+32/O6AgzPa3YaroOlO0q/3ZZi6xzF9+F0t6zZgVx4C7mognmqNQPhAfFW/K2ZoRZEtV2mKoIj0CTaxFFoeJr2QSgD1ZBZXAK3QdHPmOA03gG0lbR9Y2wKNV2Otx47vfYR110DzXaSfqwu2Gdt8R5GiEhGRzu6c+zpN0WhHcQXQEo97FlcAf5//NtF4PK3NuvXQ9Gc8r/dNtxcg6vyxzQ+AW8/64gqgBVqfwsa9vweDkQosEekbYvPBdN0wEJJFVhbRVzzGeZ31q213ZiHhuTds3xD/IMv5x6BtVq+HIyIimWYtXkhbYgN5qQsLvL96VXpj/EPwXKo9DtE+fr2PvUpyVkkXJgDxd3o9nL5KBZaI9A3OSLC5J63k14z1aMvyvpKNJzcy7Kt8I8DzCZsPBtjmviIi/dX4yiqcbu4JOLaiMr3BGQ425tHTgNPHr/fOeJIb0HflJvO4ACqwRKSPMIGtIbAVmRfuEN7bFBkouyyztex8INylNQzhw/v0RsTGPwaCk4GuT7GCmNKvFiMkERHp4qxdJhN00hdzCPh8DCst9ey/TW0dFeH0nGSckRCcRub1PoQp+1oeo80/U3IqmWvk+ZM3PP0TixFSn6QCS0T6DFN9KwT3IFlkhcA3ElN9M1TfRnrRFIKqmzxXETShPaHiJ2CqUl8TgsjhmMqf9cIZ9IypugFC+7L+/Idiqq7FBLYrdmgiIgJsVVvHzYcdxYiyckKOQ8Dn8IXxm/HUaWdwxISt0/vW1PLgCad4jmOqroXQfiSLrBD4hmCqrsIEdiz8SfSA8Y/F1NwKzhiSN0ADEJyOqbkjueGvAFpFUET6IOs2gG1OFhidLthu/CMAfP4tNj6GTYC7AkxFaqf4/sO6jcll6n3DBm3C0iqCItKXWWtZ1tRIaSBIeSjU0d4ajzN/xXLGVVZRU7Lx3LP+ej8UY/rPcw9rLbjLwUQwvopih1M02XKV9sESkT4nOZUvczpfLoVVxxjGSc5z74eMrwwoK3YYIiKShTGG4WWZeSrs97PLiNzfReqv13tjDDjDih1Gn9V/SmUREREREZE+TgWWiIiIiIhInmiKoIgU1KdLb6Mmdj0l/hZaE0GWmzPZfNTlqV3s7wDaksunl/8EX+RAzzGsbcE23gotfwcsRI7ClJ6bt3er3HW/gJb7gSj4hkLlbzDBadimv0DLvcmNikMHYsovxPhqvGNMLMY2XAvRF8FUQOmZmMgJWd+hsm0vYBuvg8RC8G+PKb8UE9i+W3Fbd01yY+W2p8GEIXIypvQMjNGlXUQkVy3RKMc+cC/vrV4JQEUoxC2HH8W4ymrO++ejvLl8GQaYPGIUtxx+VMaqgO3mr1zB719+kTeXLWVkRQXf2HU3vrjZ5nmJ0Y1/Bmu/AfH3AJNcEKrqOoy7Ett4LURngK8GU3o2hI/Mnnta/4ttvAHcJRDYCVN2aXIVX6++thXbeEuX3Pt1jM97xcRsbPRVbMM1kPgY/BMwZRdjgpO79w3oZ7TIhYgUzCeLrmes73oA2q/11kKjO5Zyx2PH98rr8UUOSmuy1sWuPjm5ETFtqdYQ+LfA1D6UfNeqB9zVX4XoC5kfBKZCbB7rN1T0J19CrvtnRnKxiZXYlYeBrWf9xsgRKDkJX8X3M4/Z8jis+z7pmzWGMTV/xQR3yilua1uSx0wsA9r3UwlDaC981X/IaYy+TItciEhv2f7Ga2mJxzPagz4fUTd94/qyYJA3zrkAny99Etg7K5Zzwt/upTUep/0n64jfz0++8EVO2L5ny5e77lpYvjvQJUZTBSTANgHtcUag9Cx85RdnjtN8P9T/kvW5xyQXqai5P6PIylfutW3PYddcREa+q74ZE9o9pzH6smy5SlMERaRghtqbMWZ9cQXJ35f5PIorgIafZrZFZ0D8fdZf4En+PvEptD3fo/hcd7V3cQUeu9XHwV2DbXk0o6ttvjOV4DpvlNwCzfdi3dXpfa2Fhl91GRugFdvwu5xjt83/gMQq1hdXyTFoewEb+yDncUREBrO/v/O2Z3EFZBRXAI3RKHfOfT2j/Xcvv5BWXAG0xOP8+qXnSXiM0y31vyWjuAKwa7sUVwAt0HRbcnXCzl1tHBp+R3rusWBbsI1XZ44dnZmX3GvTCrp2rdiGX+c8Rn+kAktECibseO1UvwHuqsy22DywbZntthnib21aYO3aXunmF7SkCq8uoq8A0cx2E4TYe+ltth7cdd7Dx9/JPZTYq8l4Mo7pg/i83McRERnEHnv/3W5/zUsLPstoe3PZMrzmhLXE4qxqad6EyDrxyjsdPIo3E4DUtibru60A65GnsBB70+OY2XOvjeWWY6y1yYLMS/zDnMbor1RgiUjBJGx393DymNfujEy+X5QhAr4RmxLWeoGtuvsF4IzLbHbG43k5tTFwusRoSsj6+qtvaO6hOONIblDZlQFf7ksEi4gMZtsMydywfmPGVVZntI0o815q3QCVIe93tnLmG929/jaWuYS6rwo8S0DA57GliTMSTCiznQima17LwhiTmsbodUzv95kHChVYIlIwC6J70/U1T2shZgPeX1B6ZmZb+ECSu8V3LtZM8ulQ+JAexefzbwG+LPt4mBqgyxxzE8CUnJzZtfQsMoudAAR2xPjHp/c1ASg5jcxiMoIpuyDn2E3JiZCxmIUDvjoI7przOCIig9k3p+/Rrf4GuGT6bhntF03bjYg//Zoc9vs5cfsdCPl7uPBQxXeyfOAjM/cEIbgbpss+kMZEIHIMOeee8AGpvl6599DcYy89G4hkHJPSr+c+Rj+kAktECmazMTezoHVHrKXj1+LWzQgOeQmcLpsGh4/EV35JxhjGhDG194J/eyAABMG/Dab23m6vZOSp7rEudwcNRE7H1D0GwampY4bAGYup/lNG0gIwgW0w1delirVQMsbQPpjqmzwPacovg5KTSSavMJhyKL8MEzki57CNMwxTfUfqSVYoGWdgMqbmLozRpV1EJBeO43D3MSfgdFl174ydJnHTYUcSctbfaIv4/fz1mBMo93giddAWE/j+nl+gIhQi4vcTcvwcv+32/GCvfXocoy+wDVT+hmQ+SjFlUPMAVF6VvLFGGAhCeH9Mlcc7VYCp+GGqyAol+5tKqPg+JrxfZl8TxtTe1+Pca0q/BqVngYkAkeQsjrKzMSVfynmM/kirCIpIwUVj9axpfJOq0u0IBddPC3ATq1LLlG+Nz7fxKRTWXQ3WYpzavMfoxleAuxj82+Lzrb8jaN21yXnovqFZl73t6GstuMvAlGJ85Rs9prWt4K4BX13yydYmSB5zBZggxle1SWP0RVpFUER62zvLl7GipZk9R4/F6VRYvbtyBX7jY8vajeeeuOuyrKmRmnCESGDTrusb4kbfAV8pPv/66erWuqncU47xeU9V7MzallTuGZrTth75yL3WRsFdmcp3XtPb+6dsuUqbpYhIwQUDFQyr3jOj3efUQjcu2Nn2oMoHn38IkDkXvztFizEGPJ5wZe8fznxHq5uSx+zGu1siIuJpu6HeU8a3qcv9PS2/z8eo8op8hZTBF9wuo80YX7dyiTERcLpO29tA/zzkXmOCyfe6BgnNIxEREREREckTFVgiIiIiIiJ5oimCIuLJ2ii26c/Q8kByydfwoZiy8zG+wk19cK3l3rfe5I435tAQbWPf8ZtzybTdGZZl+VsRERncPl6zmqtmvMSrixcxtLSU86ZM49AJ3d2Co3tsYim28Tpoew5MBZSciSk5YaPv6crgoQJLRDzZNedDdBYdO7A3/xXb9izUPVawF1R//Oy/efjdd2iJJ3esf+idt/jPJx/xr9PPoCqc+3xxEREZ+D5Zu4aj7rublngM11pWNDfxrWeeZHFDPV+bVJg1cqy7GrvyaLDrgASwAhp/iU28h6n4UUGOKf2PpgiKSAYbewuir9JRXAEQS65S1PpUQY65tLGBh+a/3VFcAcStpaEtyt1vzi3IMUVEpP+6/pUZtKaKq3Yt8TjXvPIybZ1yST7ZprvANpIsrtobW6D5fmxiRUGOKf2PCiwRyRR7E88d320zNvpaQQ759orlBB0no70tEWfmogUFOaaIiPRfry1ZTMJjuyEDLKhfV5iDRl8BopntJgjx9wpzTOl3VGCJSCbfCDCZxU77hruFMLK8grjrZrQ7xjC+qrogxxQRkf5rdIX3O8Ex16WupKQwB/WPw/PHZxvv1jYdMrCpwBKRTKG9ki/udr1EGD+m5JiCHHLbuiFMqK0j4Es/ZtBxOGOnXQpyTBER6b/OnzqNiD99OYGQ43DQFhMK9t6uKTkD6PoecgAC22H8WxbkmNL/qMASkQzG+DG190JgZ5KJJATO5piavxZ0s987jjqWPcaMI+g4hBw/I8rK+ePhR7NFzabvHi8iIgPTHmPG8csvHkB1OEzE7yfkOBw2YWuu2P/Agh3TBLbCVP8BfMOBMBCE0F6Y6j8W7JjS/xjrMXe1r5syZYqdPXt2scMQGRSsuxpsHOMM7bVj1re10hSNMbysTMveDlLGmNestYVZBqyXKFeJ9I6E67K0qZGqUJjSYGFWue3KWgvuUjClBd2+RPq2bLlKy7SLyAYV8olVNhWhMBWhcK8fV0RE+h/H52NUee8WOcYYcEb06jGl/9AUQRERERERkTxRgSUiIiIiIpInmiIoIp6iiQR/ev017nv7TeIJl0MnbMWFu07POnXv83VruXrmy8xY8Dm1JSWcM3kqR261TV7eoWqNx7h59qv8/d23sRaO2WZbzp0yjZJAoMdj54tNLME2XgdtL4CvEkrOxESO0ztkIiIF9PGa1Vw18yVmL1rE0NJSzps6jUO23Cpr/yc/fJ+bXn2F5U1NTBk5ist224PNq/MzFX7+yhVcPfMl5i5dyqjyCi6aNp19x2+el7HzxbY+i236AySWQGAnTNklmED275dsGi1yISKeznz077yyaAGt8TgAQZ+P0ZWV/POULxPqsizu4oZ6Dr37LzRGo7ipDYojfj9fnzyVb0zbvUdxuNZy4t/u5e0Vy2lLJIDkMrwTaut4+MRTcXzFfxBvEyuxKw8DWw8kUq0RKDkFX8V3ixlav6VFLkRkYz5du4Yj7r2LlngM167PPZdN34OvTsq8fNw+ZzZXzXyJllRe8xlDxB/gsVNO7/F+i/NXLOf4v91HazxG+0/WEb+fn+6zH8dvt0OPxs4Xt/kBqP8l0JJqMWAimJr7MYGtixlav5UtVxX/JxMR6XPmLV/GrE7FFUDUdVna2MhTH32Q0f/m2bNojsc6iiuAlnicm2a/SmPUY8f7bpix8HPeXbWyo7gCaEsk+HjNap7//NMejZ0vtvlOsE2sL64AWqD57uQqjCIiknfXvTKDltj64gqSuefqV16mrVP+AmhLtbd0anetpTUe4/pZM3scy+9efjGtuGqP5dcvPkfCdXs8fk9ZG4eG37G+uAKwYFuwjdcUKaqBSwWWiGSYu3QJXg+3m2MxXl20KKN91uJFxD0SSMDx8dGanhUYc5cuzUiU7bG8uWxpj8bOm+grgEchaYIQe6/XwxERGQxeW7I47cZeOwMsqF+X1vb5unV4TdhOWMvsxZl5rbvmLlvqEQk0x+Ksamnu8fg95q4A2+bxgYXY3F4PZ6BTgSUiGUaWV3hOvQs5fsZWVma0j62o9ExcsUSC4aVlPY6l65REgJJAgBFl5T0aO2+ccXheTm1My/iKiBRItqXZY65LXUlJWtuQ0hJiWZ4kja7o+RLvw8u8c50BKkKhHo/fY6YSPEtAUpsmSz6pwBKRDHuPG09FKITTZYEGv8/Hcdtun9H/3Cm7Eu5SBAUdhz3GjGNYlqSTq4O33JKQ408r4AwQ9DkcNqFvzBk3pWcBXTe3DEBgIsY/vggRiYgMfOdPnUakS+4JOQ4HbbElVeFIWntVOMJBW2xJyHHS2iN+P+dPndbjWC7adbeMWMJ+PydsvwNhf/EXZDK+EogcDXRdqCqCKTu/CBENbCqwRCSD3+fjgeNPZsdhwwn6HEKOw+ZV1dx97AnUdrkrCDBpxEiuPOBgaiMRwn4/QcfhgM234NqDD+txLGF/gL+dcDLbDRlK0HEIOg5b1w3hvuNPojTYtagpDhPYFlN1LfiGkExeQQjtjam+qdihiYgMWHuOHcfP992fqnC4I/ccOmFrrtj/IM/+V+x/EIdO2Jqg4xD2+6kKh/n5vvuzx5hxPY7l4C0n8N099qY8GCLi9xNyHI7bdnt+uNc+PR47X0zFjyByFBACImDKofy7mPD+xQ5twNEqgiKyQauam4m7bk5PohKuy9KmRiqCIcoLMCViRXMTWBhSWpr3sfPBWhfcpWDKML6eTzkZzLSKoIjkKpFahKkyHKYshxtvjdEo61pbGVZWhj/PK9HGEgmWNzVRHYn0qa1EOrNuM9g14BuKMX0zxv4iW67SPlgiskFeT6yycXy+rHPi82FISd8srNoZ4wNnZLHDEBEZVByfj1HdeI+qLBjMqRDbFAHH6VYsxWB8JUDuuV26T1MERURERERE8kQFloiIiIiISJ7kPEXQGHMscAUwlOQiXgaw1tqsz0GNMX8CDgeWW2sztrE2xuwDPAp8kmr6u7X2Z7nGJCLe3l25gqtmvsSbS5cyqqKCi3bdjX3Gb9atMRatW8cBd/2Z1tQGvw6GPx11LDsOG84Nr87kyQ/eJ+g4nDpxJ76y0y4sbWzk6pkv8fLCz6mNlPD1yVM5YqttMMZrAXe4auZL3D7nNdricepKSvjZvvtxwOZbYJv/Bs13gm2A0D6YsovAV4FtvBVaHk5+ceQoTOnZqWkOIuspV4n0D9ZaHnh7Hn96Yw71bW3sM34zLp62G8O7uf3Gz5//L39+4/WOP4+rqOTZM77Ga0sWcfWMl/lw9Sq2rKnlkum7M2XkKJ768ANunP0Ky5samTpyFJdO34PNq2s8x25sXcvNM2/n0Q+b8fksx21VwTnTzmZtm8u1r8zgf59+QkUoxFk7T+LE7Sfy3qqVXD3zJeb2IPfKwJDzIhfGmA+BI6y183Me3Ji9gUbgLxtIWt+01h6e65igF4dFNmT+iuUc/7f70naUj/j9/Gyf/Thuu4x/hlltft3vPdtHlpSysq2VaKrwivj9TBk5irnLltIYjeKmrikRf4CvT57KN6btljHG5U8/ycPvvZPRfv/Bq5hc8U/W7zTvT+7d4YyE+PtA+yaJIfBPwNT+DWOcjHGk/9vURS6Uq0T6h58+918eePstWuIxAPzGUBkO8/TpZ1ATye3m2RUvPMcfX8/8Nxby+TA+H62dNqkP+/0cu812PPzu/I5j+owh4g/w2CmnM76qOm2MWCLK0ff8mo/XhWlz/alx42xX08pnjXXUt7UR78h3fvbffEv+/fFHGbn3p/vsx/HdyL3Sv2TLVd2ZIrisOwkLwFr7PLC6O18jIj3z25dfSLvAA7TE4/z6xedJZNlksavz/vlo1s8WNzd1FFftY7+84HOaOhVXyfYYN782i8ZoNO3rW+Nxz+IKLLe82cD64gogDrYe4u+yvrgi+fvExxB9IafzkUFFuUqkj1vR3MS9b73ZUegAxK2lMRrlL3PfyHkcr+IKoM1104orSOaersd0raUlHuP6WTMzxnj2g8f4rCHUUVwlx/Xz1qoI9dHWjuIKknnw8fff7XHulYFjo1MEU9MtAGYbY+4HHqHTTzrW2r/3MIbdjDFzgcUk7xC+3cPxRAa1N5ct9dyrvSkWY3VLS05LnM9YuKBbx3St9Tym3+fjozWr2WnY+l3i31mxPMsohjdWDfVoj3m0AbYZYvMgtE+3YpWBSblKpP+Yv2IFIcdJu1kH0JZI8Mqi7uWf7vDKU661zF68KKP9jSWf0hzPXMI8Zn3eA2UZvzkWY1VLM0NLN77ViQwcubyDdUSn3zcDB3b6swV6krTmAOOstY3GmENJJsQJXh2NMecA5wCMHTu2B4cUGdiGlZaxprU1o90AFTnuTTW8tIz6traNd0zxGUPCY7pxLJFgWJeCbmxlVfbjljR5tBrAAeJd2kvANyLnGGXAU64S6SdGlpcT83iq4xjDuA3kiELxWlZ9VEUVEWcFLYn0IstvXBLWyVZjZehO7pWBY6NTBK21Z1przwRua/99p7bbe3Jwa229tbYx9fsngIAxpi5L31ustVOstVOGDBnSk8OKDGgX7robEX/6vZOw38+J2+9AyJ/bujZ3HXti1s+CpC9a4RhDTaSEsJP+LlTIcdhjzLiMF5brSkrYvMtc93ZnbbOEzPs+ITAlkHZcAyYA4UM2ciYyWChXifQfW9bUsm3dEAJdNvkNOA5n7jI553F2HDos62dd82DE72fbuiGEnMz286dMy/j6I7Y7moDP0vm5lMGlNBAn2CXfBXw+xldVeebe47fbnrBfm/kONt15B+v6HNtyZowZblJLjBljdk3Fs6onY4oMdodO2Irv7LE35cEgEb+fkONw7Dbb8YO99sl5jLqSEi6fvntG+4GbbcG9J5zMuMoqQo5D0Oew8/ARPHzSqVx54CHUhCOE/X6CjsN+m23BtQcf5jn+IyefnlFkHTFha46efA0EdwMCQAh8wzDVN2JqHwD/NkAw+cu/FabmHoyvb288LEWhXCXSD9x+5DHsOXYcQcch7PczrLSUGw89kq1rPe9deHrk5NMZEs5cEOPJU77E1yZNIeL3E/EHiPgDfHWXKTx4wskcMmECQcch4vdTFQrz8333Z8+x4zLGqIjUct8x+zOhsomgL07Ql2C76iYeOv4IbjrsKIaVlnbkuz3HjuOhE0/1zL0/3HvfHn2fpH/a6CqCxpjdgN2BS4CrO31UARxjrd1pA197L7APUAcsA/6P5E9OWGtvNsZcCJxHcu5PC3CZtfbljQWtlZlENi6WSLC8qYnqSISSwKbfPXt0/jusaWnhtIk7EkiNY61lWVMjQcdJW+0p4bosbWykPBTKaUrEiqZGPl+3jm3rhlASDHa0W3cd2CbwjUhb5t0mVgIW4+jJwEDX3VUElatE+qd1ra00xqKMKCvHl2Vbj41Z2dzM/W/NZeqoMew6anRHe1s8zormJoaUlKbN4GiMRlnX2sqwsjL8vo0/a1hW/yk+4zCkfExHm2stSxobKAsEqQyHO9pjiQTLmhqpiZT0KPdK/5AtV+VSYH2BZOI5F7i500cNwGPW2g/yGGdOlLRERAa2TSiwlKtERKRXZctVG30hw1r7HPCcMeYOa+1nBYlORESkB5SrRESkr8hlmfbHSL3hZzwe3Vprj8x/WCIiIrlTrhIRkb4ilyXFrkz991hgOHBX6s+nAJ8WICaRQSvhutw1by53vfkGzbEYB26xJRftOj3nXe03heu6/N9z/+XBd94i5rqMqajgygMOYfLIUZ7917W2csOrM3nyw/cJOX5O2WFHzth5EnMWL+Zb/36KRQ31BHwOJ+8wkR/ttQ9vrVjO1TNfYv7KFWxWVc3F03Zn+ugxnmP3JdYmsM13Q/M9YFsgfCCm7HyMz3sFRCk65SqRXrKovp6rZ77ESws+pyYS4exJUzlq6208b27ky8drVnHhE4/z/upVOMaw/+ZbcPWBhxLMsjrua0sWcc3Ml/lg9Som1NRyyfTdmTxiFFe89Dx/mfs6bYkEw0rL+M3+B7L76LHcPW8uf03l3gO22JJvFDj35ouNL8Q2XgfRGeCrxZR+DcKHFfTvQjZuo+9gdXQ05nlr7d4ba+sNmtcuA9WlTz/Bvz76gJbUDvQBn48hJaU8ffoZlHZaBCKfTvzbfcxekrnJ4j9OPp0duiyB2xqPcejdf2FxY0PHBpFhv58dhw5jlsdGjdvXDeXjtas7zqe9/3UHH8b+m2+Z5zPJL3ftpdD6X5JrGgAEwBmGqX0c4+v7Sbe/6+47WJ2+TrlKpICWNTZy8N130hht69j/MOL387VJU7h0+h4FO+aef74lY7/FMRWVPHfG1zL6v7TgM85+7BFau+Se7eqGMGfpkoz+e4wZy5wlizNy71Onn0FZgXJvPtjEUuzKw8E2Aql9xUwESs/GV3ZhUWMbLLLlqu4s0z7EGLN5pwE3A7SUl0iefLp2DU99+H5aMRJzXda0tvD3+W8X5JgL69d5FlcA3/vPvzLaHn//PZY3NXUUVwCt8bhncQXw9srlaefT3v9nzz/bg6gLz8Y/htZ/s764AohBYhW25dFihSW5Ua4SKaBb57xKcyyaVuy0xOPc8tqr3dqgvjt+9vx/PTezX1C/jpcXfJ7R/vPnnk0rriCZe7yKK4CXFnzumXsffOetHkZeWLbxVrDNdBRXkJxx0XgL1m0sWlzSvQLrUuB/xpj/GWP+BzxLcjlcEcmDecuXeS4X2xKPM2PhgoIc83+ffpL1s4/WrM5om7VoIc3xWI+Pu7ihgbYuya9Pic0D4zXtpAVis3o9HOkW5SqRApq5aCEx181oDzoOH6xeWZBjvrZ4cdbP/v3xhxltH6zu+TZ1LfE4ryxa2ONxCio2i+TuEV0YP8Q/6vVwZL1c3sECwFr7lDFmArBNqulda21hblWIDEIjysrxmrDbvkN8IWxbl/3GflUonNE2trKSkOPQ1ukJ1qYo8QcIOk6PxigoZ2SWD4LgZG5IKX2HcpVIYY2tqGT+iuUZ+SqaSDC8rLwgxxxRXs7y5ibPz7asqc1oq45EWN3S4tE7dwGfj3GVVT0ao+Cc0RB/H7r+bdgYOMM8v0R6x0afYBljvpj677HAYcAWqV+HpdpEJA8mjxjJiLJynC4vpvp9Pk6dmHWP1J4dc+Qoz0IK4NLpu2e0nbj9RJwuT9kcY6jMsqlwid9PpMsLyBG/nzN3mdS3X8ANTAbfEKBLEWgcTMlJRQlJNky5SqR3nDN5KuEu1/Wg47DrqNGMKq8oyDG/v+cXPNsDPh8nbz8xo/3rk6d65p7yLO9T1UYinrn3tALl3nwxpecAXfNvEILTMM7wYoQkKblMEWz/v/oIj1+HFygukUHHGMPdx57A1JGjCToOIcfPqPIK/nTksYyuqCzYcf956pcZWlK6Pg7g7F0mc4JH0hpaWsZfjz6+40lW0Oew0/AR/PPUL3PmTpPonJ6Gl5bx7y+dxflTp1ESCFDiDxD2+zlt4k58Y9fdCnY++WCMD1PzVwhMAoJACJzRmOrbMM6IYocn3pSrRHrBzsNH8PsDDqE2EiHs9xN0HPYZtxk3HHJEwY45ddRofrTXPmlFUEUwxKMnnY7PY2r913aZwlm7TCbi91PiDyRv7O08mf995auM7lQEGuCk7XbgiVO/0pF7w/71uXdMZeFybz6Y4C5QeQWYaiACBCG0L6bqmiJHJjmvItiXaGUmGehWtzTTGo8zoqy81570LKxfx5KGBnYaNjzrsrftrLUsbWwk6DjUlqxfUS8ajzN32VJGllcwqmJ9EmuLx1ne1ERdSQmRQKBg51AI1l0Ntg18w/v2U7cBZlNXEexLlKtkIEu4LksaG6gIhajIMhMi31zXZe6ypVSGQ2xenTk1sKvWeIwVTc0MKS0h7F+fe5Y0NLCwfh0Thw1Pexq3uqWZlnickb2Ye/PB2gQkloCvEuMrzDRN8ZYtV+X8DpYx5iNgJvAC8Ly19p08xicinRRj743RFZU5PykzxjCiPPMiHvT7mTpqdEZ7yO/v83cCszG+mmKHIN2gXCXSOxyfr6CzK7z4fD52GZHtHdlMYX/AM/eMKC/3zGH9Yd8rL8Y44M/MvVI83VlFcDvgj0AtcKUx5mNjzMOFCUtERGSTKFeJiEhRdafASgCx1H9dYBmwvBBBiYiIbCLlKhERKaqcpwgC9cA84CrgVmttzzcZEBlAPlm7hqtnvMTsxYsYWlbG+VN25cAtJuRl7M/WruHUvz/AksbkxoFjKyp56MRTWNLYyKVPP8Ena9fg9/k4cqtt+M1+B/LMJx/x42f/zaqWFiJ+P+dO2ZULpk7n1jmzuWHWDJpiMarDYX60974csdU2/OP9d7nltVdZ09rCHmPGccn03RlaUsqf35jD/W/PI5pIcPhWW3PelGmA5Q+zXuGJD98n5DicMnFHvrLTJM89vESKQLlKJAtrLY9/8B63vPYqq5qb2W3MWC6dvnvepvrd+OpMrn1lBjHXxWcMJ28/kV988QBumDWDP772Ki3xOHWREn66734csNkWfPffT/OPD94j7rpsVlXN1QcdyrjKSs5/4jFeWbgQi2WX4SO48bCj8Bm4ftZMnvnoQyKBAF/acWdOm7gTn9ev45qZL/PqooVpuXfOksVc88rLfLBqFVvW1HDp9D2Y1I3phSI9kfMiF8aYo4A9gV2BKPAyyfnt/ylceN704rD0NZ+uXcOR991FcyyGm/o3FfH7+dbue3HGzpN6NHZjNMpON1+fseeIz5iOY3U2uryChQ31Ge1b19Tynsfmi/tttjkvd9rF3jGG8mCI7YcO5bUli2lNtQd9DmMqK4i7LksaG4mm9sIK+/3sPW48Nx92VI/OU6SzTV3kQrlKJLvrZ83g5tmzOq73PmMoDwZ54tSveL6T1B1XvvwCN87O3Ih9RGkZS5oaM9pHlZezqKEho700EKAplr6hfcjxUx0Ksaq1pWOT44jfz55jxzNj4ecZufe4bbfnwflvd+QvSOaqW484mj3GaC9DyZ9suSrnW87W2kettd8Cvg48AZwBPJ63CEX6setemUFLpws8JHeB//2MF2mLe+yy3g3ffuYpzw2IvYorwLO4AjyLK4D/fPJxR7IFSFhLYzTKKwsXpiWnqJtgQX09SxrWF1cArfE4z3/2Ke+uXJHD2YgUlnKViLfGaJSbOhVXkMwjzbEYt8x5tcfj3/ya9xhexRXgWVwBGcUVQFsizsqW5o7iCpI59j8ff0hzNDP33j1vblr+gmSu+vnzz270PETyIecCyxjzUGp1pmuBUuDLQHWhAhPpT2YvWUQiW8FTv65HY89ZsqhHX78p4tYlYd2M9mgiQdRNZLQb4M1lS3shMpENU64S8fbh6lWeU7ljrsvMhQt6PH62m375EvcY3wKuxy3IbJF8sEozhqV3dOcdrN8Ac6y1mT9dAcaYA6y1z+QnLJH+ZVR5BQvrM58cxVw3bZ+oTTG0tIzlzc09GmNT+IzJKBrbN3ns2u4zPkaUae8N6ROUq0Q8DCstI5bwvkE2th9so2HILJyMMXRnP9fqSCSvMYlk050pgq9mS1gpV+QhHpF+6fwp04h02Zw35DgctMWWVIV7dkG/Yv+DutU/22ITIcfxbB9SUkKwy2dhx6EqHMHXZaPFkN+f0dcxhqpwmN3HjO1WnCKFoFwl4m1EeTnTR4/NvN77/ZwzeWqPx580fIRne7YfNLu7MFKwS3/HGIaVlnnm3m1qh2S0R/x+zpnU8/MUyUU+l/3qP1tei+TZXuPG85MvfJHKUIiIP0DQcTh4y626XRx52XbIUL6z+15pbQa4Yv8D+cauu+Hr9E+vOhzhmdPP4KAttkzrv0VVNS+ceQ7b1NWltX9h3Hj+ddoZfGHceIKOQ8TvpzYS4aoDD+Xhk05l4tBhBB2HkOMwrrKKu489kb8ecwJjKyoJOclia8dhw7nv+JNwtIqg9A/KVTJoXX/I4ew7frOO631NOMJv9z+YySNG9Xjs+487ifGVVWlt1eEwL531dbaoSp+le9AWW/LM6WdQ3ekGpA/DN3bdjTuOOpaQs744Cvh8XH/wYfz56OMZWV5OOHWjb8rIUTx80qn8dJ/90nPvFhN48ISTOXPnyUT8fkr8ASJ+P2fuPImvTer2ujkimyTnVQQ3OpAxc6y1PVsuLUdamUn6qrjrsqShgapwmPJQKO/j/++Tjwk6fnYfu/5pUdx1eX3JYoaWljKuUxJrjEZ5Z/lyNquuYkhpWUf7yuZmPl69mm2H1FEeCne0r2ttpb6tjZHl5WnF0ormJmKJBCPKyjGpJ1rWWpY0NhB0/NT1cAqkiJdNXUUwh3GVq2TQq29rZV1r5vU+H1Y1N/Higs+YNHwUYzpNPVzW2Mgna1ezw9DhlAWDHe2frV3D8qYmdhkxMu2p1jsrlpFwLROHDe9os9ayuLGBsONPm37fnnsrw2EqOuXe1niM5U1NDC0tJewP5PU8RSB7rlKBJSIifY4KLBER6et6vEx7Dj7N41giIiKF8GmxAxARkYFto6sIGmOO3dDn1tq/p/67wX4iIiKFolwlIiJ9RS7LtB+xgc8s8Pc8xSIy6CVcl3veepO/vvkGzbEoB20+gQt3nU7I7+fm2bN49L35OMZw/HY78NVdJhPye/8Tfm/VSq6e8RJvLlvK6IpKLtp1OnuNG+/Z11rLY++/yy1zZrOmpYU9x47lG9N2Z1R5RbdiX1i/jmtmvszLCz+nLlLC1yfvymFbbd3db4HIplKuEuklixrquXbmy7y04HNqIhHOnjSFI7bahndWLOeaV2bw9vJljKuq4uJpuzN99BjPMay1PDT/bf70+musa2tjn/GbcfG03Rja6Z3hzla3NPOHWa/w9McfUBII8qUdd+a0iTtlrHa7Mf/66ANumj2LZU2NTB05mkun7874Km2VJ/mVt3ewepPmtctAdfm/nuSpD9+nJbUDfcDnY0hJKRXhMJ+sWU1bag+TsN/PzsNHcPcxJ3QsPNFu/soVnPC3e2mJxTr2DAn7/fzyiwdwzDbbZRzz6pkvc9ucVzuO6RhDeSjEU6d9JWui62pJQwOH3nMnDdFox2aTEb+f86dO44Kp0zflWyGDXKHewepNylUyEC1rbOSQe+6koa2tY0/EiN/PEVttw2Pvv0trPJ6We64+8FAO2nJCxji/eP5Z7n3rzY7c4/f5qAqHefq0MzL2q2qKRjn47jtZ3tRIzHU7jnnIlltx5YGH5Bz7n9+Yw5Uvv9BxTJ8xlAQCPHbylxhXVdXN74RInt7BMsYcZoz5tjHmx+2/8heiyOD22dq1PPHBex0XfkhuVLyyuYmPV68vrgBa43HeXLaU2UsWZYxz5csvpBVX7f1/+cL/OoqfdvVtbdzy2qy0YyaspSka5dY5uf9g+MfXZtEUi6WN3xKP84dXX6EpGs15HJF8UK4SKZzbXp9NUzSatuF8SzzO3955i5ZOxRUkc8/PX3g2YzPglc3N3D1vblruibsuDW1t/PXNNzKO+dD8t1nd0txRXLUf858fvMfn69bmFHdbPM5VM15MO6ZrLc2xGNfPmpHTGCK5yrnAMsbcDJwEXERyH5ETgHEFiktk0Jm3fKnnxotR1yXqZu6bGkskmLt0aUb7G0uXZOx2D9AUjbGquTmt7YPVKzM2nYRkYffKwgU5xz5z0ULinRJfO7/Px4drVuc8jkhPKVeJFNbMhQvSCp122eZDLWtsojkWS2ubv2K5Z+5pSySYsfDzjPYZCxekFUbt/D4fby7LzINeFtSv82x3reXVxZk3K0V6ojtPsHa31n4ZWGOt/SmwG+A9sVZEum14WblngvIZ41l4BR2HEWXlGe3ZpvUZSNsfBGBYaRnRRGaiNMDoisqM9mzGVHi/rxVLJBhWWprzOCJ5oFwlUkBjKiq7tVt3yO8Q7vK+8PCycs+bco4xjK3MzD3jKisJZNmva0R5Zh70Uhsp8SwMgW6/cyyyMd0psFpS/202xowEYsBm+Q9JZHCaPGIkI8rKcbq8UxX0+dJ2tYdkART2+9l/8y0yxrlo1+lEuiSzsN/Pcdttn7EoxuiKSqaMHEnAl34nMeT38/XJU3OO/euTd804ZtBxmD56LMM9ikCRAlKuEimgsydNycglQcdhy+oaz9zzpR13ztjMeEJtLVvXDckomoKOw5k7T8445mkTd8640egYw/CyciYNH5lT3NWRCAdsvgWhLk/OIn4/503dNacxRHLVnQLrcWNMFfA7YA7JvUTuK0BMIoOSMYa7jjmBySNGEXSSd/xGlpfzp6OO4/7jT2Lz6mpCjkPQcdimbgj3H3+y5yqCh07Ymm/uvhelgSAl/gAhx+GorbflR3vv63ncPxx6JF8YP56g4xDx+6kJR/j9AQez0/AROcc+ZeQofrPfQVSFw0T8AYKOw77jN+P6Qw7f5O+HyCZSrhIpoF1GjOTKAw6mJhwh4vcTdBz2HjeeB084hQumTqckEKAkECDk+Dlp+4lcvtuenuPcfsQx7DZmbEfuGVJSyvWHHME2dUMy+o6prOS2I45hRFk54dQxJ48Yxd3HZi70tCG/3f9gDtxiQuqYASpCIX7yhS+y19jxm/rtEPGU8yqCxpiQtbat/fdAGGhtb+tNWplJBrpVzc20xuOMLC9PSx5LGhpwfCan1f2iiQRLGxuoiZRQFgxutP+61lbWtbUyqrwi425jrhKuy6KGeipDYSrD4U0aQwQ2fRVB5SqR3pHtet8Wj7O0sZEhpaWUBAIbHWdNSwuN0SijKio2uuS6tZbFDQ2E/X5qS0o2Ofb6tjbWtrYwoqycgMe7YCK5yparctkHq90MYBJAKlG1GWPmtLeJSP5kSxy5zjWH5FSLsZVVOfevDPe8KHJ8vm4dU6QAlKtEekG2633I7+/WkufVkUjGsuzZGGMYleWd3+6oCIUy3kkWyaeNFljGmOHAKCBijNkFOt5trAA2/faBiIhInihXiYhIX5HLE6yDgDOA0cBVndrrge8XICYREZHuUq4SEZE+YaMFlrX2TuBOY8xx1tqHeiEmkZwtaqjnuldm8OLnn1FbUsI5k6Zw2IStu/XSa7E889GH3Dj7FZY3NbHrqNFcMm13xlRWcs+8ufz1zTdojsU4aIsJXDB1Ws7TJ0QGK+Uq6cv+8/FH3Dj7FZY2NjJl5Cgumb47m1VVFzusjWpoa+Pm12bx2PvvEnQcTt5+R76y0y6saG7i2vbcG4lw9uSpHN5Pcq9Ib+jOIhfDgV8CI621hxhjtgN2s9beXsgAvejFYQFY1tjIIXffSUO0rWNH+Yg/wNcnT+Ub03YrcnQb9uc35nDlyy90bJzoGENJIMBuo8fywuefdrQHfD6Glpbx1GlfoTSHhSpEBooeLHKhXCV9yl1vvsGvX3yu47ruM4aIP8A/Tjm9TxdZbfE4R9z7Vz6vX0c0kdzsPuL3M2XEKOatWEZDW+fc60/l3t2LGbJIr8uWq7qzVNifgaeB9g0H3gcu6XloIpvm1jmv0hSLdlzgAVriMW6aPYuGtl5fMCxnbfE4V814MW1X+oS1NEVj/PvjD9PaY67L6pZmHn73nWKEKtIfKVdJnxFNJPhtp5tpAK61tMZjXPfKjCJGtnFPffQBixsbOoorgJZ4nBkLF9DYqbhqb79p9izq+3DuFelN3Smw6qy1DwAugLU2DiQ2/CUihTNz0ULPXdkDjo8PV68qQkS5+XzdOs92F4vX8+SWeJyXF3xe2KBEBg7lKukzFjXU43rMFEpYy+zFC4sQUe5eXbSQ5lgsoz1hXeIe5xR0HD5YvbI3QhPp87pTYDUZY2oh+TOgMWY64P2TokgvGFNRgdds71giwdCyje8TVSx1JSWehSHgOX894PN1a8lbkUFOuUr6jNpIhHiW6/2Isp4vN15IoysqCXnsEZVtr6poIsHw0ty3EhEZyLpTYF0G/APY3BjzEvAX4KKCRCWSg7MnTSXsT1+nJeg47DpqNKPK+27iqo5E2G+zzTMSV8TvZ2hJKU6X5OX3+Th1h516M0SR/ky5SvqMilCYg7eYQMhJz1URv5/zp04rUlS5OW677fF32XTeZwyV4XBG/go6DlNGjsrLHlUiA0F3Cqx3gIeBV4FlwK0k57aLFMWkESP57f4HUx2OEPEHCDoOe48bz/WHHFHs0DbqygMO4YDNtyToOJT4A1SEQvzkC1/k4ZNOY5cRIwk6DmG/nxFl5dx2xDGMqawsdsgi/YVylfQpv9n/QA7eYv31vjwY4od778s+4zcrdmgbNKSklL8ecwJjKyoJ+/0EHYcdhgzl4RNP4/cHHpKWe/caO54/HHpksUMW6TO6s4rgAyT3E7k71XQKUG2tPaFAsWWllZmks4TrsrC+nspwiKpw/1rOvL6tlTUtrYwsLyfQ6Y7gyuZmWuMxRpVXaNlbGZR6sIqgcpX0SfVtbaxpacm43vd11loWNzQQcJKr2rbrz7lXJF+y5apcNhput7W1tvM8pWeNMXN7HppIzzj9+B2lilCYilA4o72upKQI0YgMCMpV0idVhEJUhELFDqPbjDGeU//6c+4VKbTuTBF8PfWyMADGmGnAS/kPSUREZJMpV4mISFF15wnWNODLxpj29aLHAvONMfMAa63dMe/RiYiIdI9ylYiIFFV3CqyDuzu4MeZPwOHAcmvtDh6fG+Ba4FCgGTjDWjunu8cR6QnXdfnhs//m4XffIe66bFFdwzUHHcY2Q4Z49v9kzRq+8dRjvLtyJY7xcfCWE7jywEMyVltq98bSJVwz82U+WL2KrWpruWTa7uw0fES3YmyOxbjltVd59L35+IzhxO124MxdJrO2tYXrZ83kf59+TEUozNd2mcLR22yr97ZkMFOukgHp3ZUruOTpJ/ho9Sr8Ph9Hbb0tv/riAfiy5J47577ONTNfoiEapSYc4Yd778ORW2/r2Tfuutz15hvcM28ubYkEh03YinOnTOv2lMb5K5ZzzSsv89by5YyvquLiabuz66jRvPD5p9wwayYL6+vZefhwLpm2BxNqa7v9PRDpL3Je5GKTBjdmb6AR+EuWpHUoyeVzDyV51/Faa+1G1y3Vi8OST0fc+1feXrE8rc0A//7SWWxWXZ3WvqyxkT3/fEvaDvYA4yur+O9Xvpox9ssLPudrjz1Mazze0Rb2+7n9iGPYbczYnOJLuC5H3383H65eRVsiuV9q2PEzcegwPlm3hrWtrR37rET8fk6buBPf32ufnMYW6as2dZGLTTyWcpX0aQvWrWXfO2+n645a29TV8cSpX8nof9XMl7hh1syM9l9/8QBO2iHzIe55/3yU5z/7lJZUrgr6HEZXVvDPU75MyJ/bvfi5y5Zy6kP30xqP054hw34/p2w/kXvfnteRB33GEPb7efCEU9imzvtGpkh/kS1XdecdrG6z1j4PrN5Al6NIJjRrrZ0JVBljundrX6QH3l25IqO4guQOpT969pmM9p89/9+M4grg03VrmbVwYWb/5/6bVlwBtMbj/OKF/+Uc47Offswna9d0FFcArYk4ry9bwrpOxRVASzzOX998g5XNzTmPLzLYKVdJX/f9/z6TUVwBvLtyJe+uWJHW5rouN736iuc4XrnnvVUrea5TcQUQdRMsbWzkiQ9y3+Hg1y88R0un4gqS+e7Oua+n5UHXWlpiMX4/Q69GysBV0AIrB6OABZ3+vDDVJtIr/vPJR1k/e2v5soy22YsXZ+3/9EcfpP3ZWssHq1d59n1v1cocI4Q5S5bQHItltCdcl5ibmXKDjsP8lZlFo4hsMuUqKSqvfNTu6Y/Tc09jNOp5IxCgySOXvLF0CV6TyptjMWYuWuDxSZYYV3jH6FUYWmDOkuz5VKS/K3aB5fVv2vOqYIw5xxgz2xgze0WXuzUim2rLmuxzwGsimUulDystzdp/i5qatD8bY6gKZy7BDmRt9zKivJyIxxQNxxjPf0Bx12V4aXnO44vIRilXSVHVeuSjdhNq6tL+XBIMZu3reLyfO7y0DJ9He9BxGOOxPPumxOhlQ/lUpL8rdoG1EBjT6c+jAc9bGtbaW6y1U6y1U4ZkWXxApLsO2mKCZ/EC8K099spo++6eX/DsG/D5OHn7iRntZ0+amjF+xO/n7ElTc47xyK22wenyErMBSoPBjLnxAZ+PreuG6OVhkfxSrpKi+rZHPoLkO06HTtgqrc3v8zFp+EjP/oduuVVG255jx1EZCmcUWX6fjxM98lo250+d5pnvtq0bQtij/YKp0xEZqIpdYP2D5HK6JrVvyTpr7ZIixySDzGOnnE5lp5WSDHDhrtM5xCMR7T5mLN/dY6+0RFQWDPLQiad6ruR0zuSpfHmnXQj7/ZQEAoT9fr680y6cPSn3d/crw2HuPfZENquqJuT4CTkOW9cN4aETT+W6gw+jNlJCxB8g6DhMHz2G2484pnvfABHZGOUqKaoDt5jAxdN2S3uUWhkK8Y+TTvfsf8+xJ7BNXfqTremjRnP1QYdm9HV8Pu47/iQmDh1G0HEI+/2MrqjgzqOPY2hpWc4xnrjdDqkiK0BJIEDIcTh+ux342wmncMRW2xByHEoCAUoDQS6bvgeHbbV1zmOL9DeFXkXwXmAfoA5YBvwfEACw1t6cWvr2BpLL6jYDZ1prN7rkklZmkkL4eM0qljY2seuo0VmXXG/nui6zFy+mIhzKaRWklliMpU2NDC8tIxIIbFJ81loWNzbgGMPwsvVTAF1rWbBuHeWhoOe0RpH+qJdXEVSukn4h7rrMXryQukgpW+YwU2FFUyPvrlrJxKHDqApHcujfRFsizqjyik3e7qMtHmdxYwNDS0op7TRdsaGtjZUtzYwsK895ZUKRvi5bripogVUoSloiIgNbbxZYhaJcJSIysBVlmXYREREREZHBRAWWiIiIiIhInqjAEhERERERyRMVWCIiIiIiInmiAktERERERCRPVGCJiIiIiIjkiQosERERERGRPFGBJSIiIiIikicqsERERERERPJEBZaIiIiIiEieqMASERERERHJExVYIiIiIiIieaICS0REREREJE9UYImIiIiIiOSJCiwREREREZE8UYElIiIiIiKSJyqwRERERERE8kQFloiIiIiISJ6owBIREREREckTFVgiIiIiIiJ5ogJLREREREQkT1RgiYiIiIiI5IkKLBERERERkTxRgSUiIiIiIpInKrBERERERETyRAWWiIiIiIhInqjAEhERERERyRMVWCIiIiIiInmiAktERERERCRPVGCJiIiIiIjkiQosERERERGRPFGBJSIiIiIikicqsERERERERPJEBZaIiIiIiEieqMASERERERHJE3+xAxisrLU8e99LPHT1Y9SvamT6YZM55fvHUDO8utihiYiIALBuZT33/vrvvPzobEorSzj24sPY/0t7Y4wpdmgiIn2WCqwi+dMP7uGR65+ktakNgMf/+C+ee3AGt827iora8iJHJyIig13TuibOm/xt1ixbRzwaB+C6C27l/dc+4oJrzypydCIifZemCBbBupX1/P2af3YUVwDxWIKmtU08+oenihiZiIhI0j9v/Q/1Kxs6iiuA1qY2/nnLv1m5eHURIxMR6dtUYBXBR298SiAUyGiPtsaY8+83ixCRiIhIutf/8yZtLdGM9kDIz/uzPypCRCIi/YMKrCKoHVVDPBbPaPf5DMM3G1qEiERERNINHz8Un5P5Y4KbcKkbVVOEiERE+gcVWEUwbtvRbLbDWJyAk9YeCAc59uLDihSViIjIekdfdAiBUPqr2o7fx4jNhzFh0uZFikpEpO9TgVUkv3j8e+y493YEQgHCZWEq6sr5zl8uUtISEZE+Ydx2Y/jR/ZdRPayScGmIQCjAdrtvw2+e/qFWERQR2QCtIlgklXUV/PaZH7Nm2Voa1zYxcsvhOI6z8S8UERHpJdMOm8x9i25h8YdLKamIaCsREZEcqMAqsuphVVQPqyp2GCIiIp58Ph+jtxpZ7DBERPoNTREUERERERHJExVYIiIiIiIieVLwAssYc7Ax5j1jzIfGmO96fL6PMWadMeaN1K8fFzqmviwei/PQ1Y/xtR0u5axtL+aeXz1Ea3Pbxr9QREQ2ifJU9y36cAlXfOV6vrT5BXzziz/htWfmFjskEZE+o6DvYBljHOAPwAHAQuBVY8w/rLXvdOn6grX28ELG0h9Ya/nxUVfw5vPv0Nac3Nzx7l8+xEuPvMp1M36pRTBERPJMear7Fr6/mAumfpfW5jbchMvST5fz7qwPueiGr3LQGfsWOzwRkaIr9BOsXYEPrbUfW2ujwH3AUQU+Zr81/5UPmPfC/I7iCiDaEmPBu4t45Z9zihiZiMiApTzVTXf8+H5am1pxE25HW1tzGzdffieJeKKIkYmI9A2FLrBGAQs6/Xlhqq2r3Ywxc40xTxpjti9wTH3W/BnvE49lJqeWxlbeevHdIkQkIjLgKU9101svvovr2oz2eFucFQtXFSEiEZG+pdAFltdOhF2vynOAcdbanYDrgUc8BzLmHGPMbGPM7BUrVuQ3yj6idmQ1gVDmrM1QJMjQsXVFiEhEZMDLW56CwZOrvCQSLuU1Zb0cjYhI31PoAmshMKbTn0cDizt3sNbWW2sbU79/AggYYzKqCWvtLdbaKdbaKUOGDClkzEWz+1FTCYQCmC7p3vE7fPHUPYsTlIjIwJa3PJX6fMDnqlO+dwyhklBaWzAcYO/jp1NaUVKkqERE+o5CF1ivAhOMMZsZY4LAycA/Oncwxgw3JllSGGN2TcU0KOcYBMNBrnruZ4zdbgzBcIBQSZARmw/jt//+MRU15cUOT0RkIFKe6qY9j5nGWb86hUhZmEhZmEAowB7HTOPSW75e7NBERPqEgq4iaK2NG2MuBJ4GHOBP1tq3jTHnpj6/GTgeOM8YEwdagJOttZmTuweJcduO5rZ5V7HssxUk4glGbD4M0/WRloiI5IXy1KY59huHcfg5B7Dkk+VUD6vUTUARkU5Mf8wRU6ZMsbNnzy52GCIiUiDGmNestVOKHUdPKFeJiAxs2XJVwTcaFhERERERGSxUYImIiIiIiORJQd/BGqhWLVnDPb/6O7Ofep3KIRWccPmR7HXc9Kz9b7jodh7/4zMk4glCJSHOu+YrHPrV/XnugZd58OrHaVjVwLTDJnHK946lvKaMf9z4NE/e9h8SCZf9v7Q3x158GOEuKza1W/bZCu76xYPMffZtakdVc/J3jmHaoZMKdeoiItJPvPbMXO799cMs/3wlO+y5Daf/6HhGbjHcs+/aFeu4bO8fs+C9xWBgzNajuOq5n2KM4f7fPsJLj7xKWWUJR3/jUPY/fW8Wf7SUu3/xEPNemM/w8UM55XvHMGn/HbPG8vI/XuWB3/2D1UvWMGn/iZz6g+MYOkbbj4jIwKR3sLppzfJ1nDPxMhrWNpFIbQocLg1x0reP4vQfnZDR/weH/4pZT7ye0b7LfhOZP/N9WpvaAPAHHMpryxm/3Wjemfk+bc1RAIKRAJvtMJZrX/4ljuOkjbH88xV8fZdv0dLQQiLuAhAqCXHOb0/nyPMPzut5i4j0Jr2D1TNP3/Es1194O23NyRzjc3yES0PcOPsKRm05Iq1vIpHg0PApuIn0nwd8jo/q4ZXUr2ggFo0DyXy313HTeemRWbQ2teEm1ueei/7wVQ76yr4ZsTx41WPc8eP7O2Jx/A4lFRFumXsldaNq837uIiK9Re9g5cnfr3mcpnXNHcUVQGtTG/f++hGa1jWl9Y1Go57FFcDr/5nXUVwBxGMJGlY38uYL8zuKK4BoS4zP5y/yHOfuX/49rbgCaGtu47bv3UO0LbbJ5ygiIv1XIp7g5svv7ChoANyES2tjK3/5yQMZ/a87/9aM4qr9a9YsXdtRXEEy3/37rudpaWjpKK4gmXv+ePlfSMQTaWO0NrelFVft8TU3tHD/FY/26DxFRPoqFVjdNOff89KSTbtAyM8n8z5Pa3t35ofdGjsejeN2SU4ALY2tzHthfkb7G8++lVZcdbbogyXdOraIiAwMyz9fSdwjT7mu5c3n3slon/PveVnH8iq8rLV4TX6JtcZYsTB9e7AF7y7C8Wf+qJGIJXj9v9mPKyLSn6nA6qZh4+rw2pYqFo1TO7ImrW30ViMyO26AMQafk/lXEooEGTo2c676kNHeUysSsThVQyu7dWwRERkYKmrLst58qx1Vk9k2srpb42fbmzGRcCmvKUtrqx5W6XlTEmCIR14TERkIVGB10/GXH0kwEkxr8wcctp6yBSM2H5bWXjO8mtKqEs9xSsoj+APp71QFwwHCZeGMAs7xO3zx1D0zxjj5u8cQ6rL4RSAUYMpBO1OtAktEZFAqrSxlz2OnEQwH0trDJSFO/d6xGf0vv/38rGMFQulrYTl+H0NG12bkwWA4wN7HT6e0Ij3n1Y2qZacvbJcxTqgkxEnfOiqn8xER6W9UYHXTdtO34rJbz6WsupRwWZhAKMBO+2zPTx7+lmf/O967jkhZOK1t5JbD+fP717LDntsSCCWLqoraMr5954Vc+9IvGbvtaIKRIKFIkOGbDeW3//4xFTXlGWNPOXAnzr3qK5SUR4ikYtn1kF347l8vKsi5i4hI/3DZreey25FTCIQCRMrDRMrCnPmrU9j9qKkZfcdsNZKv/vrUjPazrzidH//tm1QOqSBcGiIQCrDt9K24fuavOOuXpxApCxMpDxMIB9j96F259Jave8byw/suZdIBOyVjKQtTWlnChdefxc777pD38xYR6Qu0iuAmisfiLP5oGeXVpVQPq9po/w/f+IR3ZrzPbkdOYUinVZNWL11D49pmRm05HMe//onW0k+X4yZcRmw+LOt0jHaxaIzFHy2jakgFlXUVm3xOIiJ9hVYRzI/6VQ2sWb6OEZsNJRgObrBvIpHguftfxjiGvY/frWPl2kQiweIPlxIpj1DXaSp8tDXKkk+WUz2s0vMmYFdrV6xj3coGRm4xjEAwsNH+IiJ9XbZcpQJLRET6HBVYIiLS12mZdhERERERkQJTgSUiIiIiIpIn/o13ka4+fOMTfnvGDXw+fxHBUIDDzz2Qc377JR79w5Pc8u27iLZEcQIOR190COde+RXefP4d7vnlQyz+aBnb7rYVp//wOMZsPcpz7HgszmM3Pc0Tt/0HN+FywJe+wDEXH0ooEvLsLyIi4uX+3z7Cfb95hJbGVoaNq+OyW89jh7224bsH/oK5/3sbay3Vwyr5+ePfZbMdxvHI9U/yrzueBWM46Ix9OOrCQwiGvN+VWvzRUu7+xUPMe2E+w8YP4ZTvHcuk/Sb28hmKiPRNegermz59ewHn7Hg5Xb9vIzYfypKPl2f032b6BD558zPamqMA+BwfoUiQ62b8ivHbj0nra63lB4f9mjeff6dj1/tgJMjmE8dyzUu/6HjhWERkoNM7WD3z+6/dyFN/ejajvaQiQnN9S0b7VlO24LO3F9DWksxVoUiQrXfdkiv/+5OMhZYWfrCEC6Z+h9amNtxEcr+tUEmQb9x4Ngd+eZ/8n4yISB+ld7Dy5Kqzb8oorgDP4grg3ZkfdBRXAG7CpbWpldu/d3dG3/mvfMC8F9YXVwDRliifvbOQWU+8nofoRURkoIu2Rnnqz5nFFeBZXAF8MOfjjuIKoK0lyvuzP2bu/97O6Hvnj++jtbG1o7gCaGuOcvNld5KIJ3oYvYhI/6cCq5s+fvPzHo9hLbw9472M9vkz3icRy0xOLY2tzHthfo+PKyIiA9/8mR9ANyenWDfzC6Itbbz9cmaumvfCu7ge/WOtMVYsXNW9A4uIDEAqsLqprKpk451yUOOxd1btyGr8oczX4kKRIEPH1uXluCIiMrCN2HxoXsYJRoLUdtr3ql3NiCrP/omES3lNWV6OLSLSn6nA6qYv/d8J3h9k2Qs4EAoQjKRv7hguDXHyd4/J6Lv7UVMJhAJ03VfY8Tt88ZQ9NyVcEREZZIaOHcKQbt6U65qnAPwBP184YXpG+ynfO5ZQSfrCS8FwgL2Om0ZpRX5uQoqI9GcqsLrpsLMP4IjzDkorqMpryrh9/tUZT5lKyiPc/dkf2Pu46QRCASLlYcKlIU79/rHsd9peGWMHw0Gu+t9PGbPNKIKRIKGSIMPHD+GKZ35ERW15oU9NREQGiBtnX0Hd6PSnT7sfPZVfPvkDjC/9Lt4+J+3OtS/9gpFbDidUEiQUCTJyy+Fc+exPiJRFMsbe69hpnPHzkwmXhYmUhwmEA+x21FQuveXcgp6TiEh/oVUEN1Frcyuv//dtho2tZfMdx3e0L/lkGbOemMPEvbdl84nr2+tXN7Bm6VqGjR9KuGTjS64v+WQZbsJl5BbDM1ZwEhEZ6LSKYH4s+mAJn7+7kIl7bUtZ1frpe7OeeI3lC1az35f3JpLaBsRay5KPl2GMYfhmQzeae6KtUZZ8vIzqYVW6CSgig1K2XKUCS0RE+hwVWCIi0tdpmXYREREREZECU4ElIiIiIiKSJ5lrgg9wMx9/jft/9wirl6xl8gE7cur3j6VuVK1n3zXL1/Lr065l3gvv4vMZdjtyKt++4wJeemQWvzvrRmKtMQC23nVLbpj5ay7e8/u88/IHHV8/dGwdd396E1/f+Zt8/OZnHe0HfHlvLr31XM7a+hKWfprcoNjnGC677Tz2O3UvHrv5Xzx5239wEy77n743x1x8KKGI93tbyz9fwV2/eIi5/3ubulE1nPTto9n1kF3y9e0SEZFe1lTfzN9+/xjPP/AyoZIQR5x3EAeftS8+n/c90ecfnMGNl/6ZtcvqKa8t4+zfnMaBX9mX7x/6S1596g0guRrt2b89ncPO3Z9jqs4gHl2/5+LZvz2daYfuwtd3+XbHXoxOwOG2t6/m47mf8stTrsGNJzcVrh1Vw18/voEVn6/i7l88xFsvzmfouCGc+v1j2eWLE7OeU+fcO2n/iZz6/eMYMto794qI9HeD6h2sB69+nDt+dB9tzW1AMuGUVpbwx7lXUtdlr4/W5laOG/JVop12tgcorymlYXVT5uCG7m3smKX/lpM2Y8G7i2hrTh43GAmy+cSxXPPSL3AcJ63v8s9X8PVdvkVLQwuJVPILlYQ453df4sjzDupGMCIifctgfQcr2hrl3EnfZukny4m1JW/ihUtC7HX8dL59x4UZ/Z+47d9cfc4fM9orasupX9WwaYFvhPEZImVhWpvacBPtuSfIN248mwO/vE9G/79f+zh/+kF67i2pjHDL3N9n5F4Rkf5k0L+D1drcllZcASTiCZrrm3ngd49m9L/jx/dnFFeAd3EF3SuuNtD/wzmfdBRXANGWKJ+9s5BXn3wjo+/dv/x7WnEF0Nbcxu3fvZtoKjGLiEj/8ex9L7FiwcqO4gqS+eu5B15m4QdLMvrfdOmdnuMUqrgCsK6lub6lo7gCaGuOcvNld5KIJ9L6tja3pRVX0J57W7j/ikcKFqOISDENmgLr8/kLcfyZpxuPJXj93/My2uf8a25vhJWTlsZW3nz+nYz2N559K624amdJLs0rIiL9y+v/mUdrU1tGu+N3mD/j/Yz21qbW3ggrJ7HWGCsWrkprW/DuIs/cm4gleP0/mblXRGQgGDQFVvWwKuLRuOdnQ8ZkzgOv7UNzw0ORIEPH1GW0Z5u/nojFqRpaWeiwREQkz4aNG4I/6GR+YAy1I6szm319Z5/ERMKlvKYsra16WCWxbuReEZGBYNAUWENG17LDXtsSCKav6xEqCXLit47K6H/2b07rrdDS+AN+uu7t6Pgdvnjqnhl9T/rO0YS6bFocCPmZfMBOVKvAEhHpdw49e3/8/vQ85fMZyqtL2Wnf7TP6Tzt0Um+FliYYCab/ORxgr+OmUVpRktZeN6qWHT1zb8gz94qIDASDpsAC+NH9l7HzfhMJhAJEysOUVpZwwXVnsfO+O2T03XzH8Vz0h6/hc9Z/iwIhP79+8geM3W5URv+f/uNb3sf826UZbT7Hx+V/Oi+jfei4Om5+/beM3noUoUiQUEmIYeOG8Jt//YiK2vKM/lMP2pmvX/klIuVhIuVhAqEAUw7ame/e9Y0Nfh9ERKRvGjZuCD999DvUjKgiXBoiGA6w+U7j+f3/fpqx0BHA//39m2w1dYu0tnHbj+Gm13+b8XSremglh593YMYYkYoIkw7YMaN98kE7MWabkRnt3/jD1zjz5ycTLgsTKY8QCAXY7YgpXHrLuZ7n9KMHLmOX/dfn3pKKCBdce+YGVx0UEenPBtUqgu3WLFvLupUNjJownEAwsMG+rusy55k3CZWGmLjnth3ta1eu5eFrnmCLncez9/G7d7S/9Ogr3HfFo+xz8h4c943DOtqf+ev/ePnR2Rx/+RFsv9vWHe0P/+FJPnvzM8785SlU1q1/6rTkk2Uk4i6jthyO6fpIq4toW4zFHy6lckiFnlyJyIAwWFcRbOe6LgvfX0K4JMjQsUM22n/l4tW8+8oHbDV587T+s56aw7zn3+Wwc/Zn+PihHe1XfvUPLPxwCd+//3KGDk9OPYxGo9xw4e34fA7nX3cGwWDyKdW6dU3c8b27GbP1SI69+PCOMdpa2ljy8XKqh1VSWVex0Ri7k3tFRPqDbLlqUBZYIiLStw32AktERPq+Qb9Mu4iIiIiISKGpwBIREREREckT/8a7DHyu6/LUn57lsZuepq0lyj4n7c7xlx1BSXmkW+N8Nn8hd//iQd579SNGTxjBaT88ju1225qfn/h7nn9oJtjkAhdf+cmJnPqD4wp0NiIiMhCtWLiKe371EK//5y1qR1Zz4reO6vYqgtZa/nvPizx8/RM0rWtmj6OncuK3jqKtuY3zp36XtcvWAVA1rJKbXv8ddcMzl4YXEZEN0ztYwBVfuZ4XHnqlY6f5YDjAiC2Gc+PsKwiGcnsR96O5n3LJnj8i2tKG6ya/p6FIkJqR1Sz5aFlG/1O/fwxn/uLUvJ2DiMhAonew0q1ctIpzdvomzfUtJOIJILnU+Vd/fSrHXHRozuPcdOmfeeK2/3RsZhwI+akcUsHKhas9+z8ZvTdj2XgREUnSO1hZLHhvEc8/OLOjuAKItsZY9ulynv/bjJzHueXbf6W1qbWjuAJoa4l6FlcA9/76kU2OWUREBpd7f/MIzQ3riyuAtuY2/vSDe2lradvAV663cvFqHrv5mY7iCiDWFmfVYu/iCuBnJ1y16UGLiAxSg77Aevvl9/H5MpdBb21qY85/3sx5nPkzP+jWcfvjk0MRESmON/47j0QskdHu8xkWvLc4pzHef/UjAqHMp1HWzf41855/J+cYRUQkadAXWLUjq/H5Mr8NgaCfYeM2vvdIu6ohmRsBi4iI5EPd6FrP9ng0TvWwqpzGqBlRlTbLIheVdcptIiLdNegLrEn7T6SkMpKx470TcDjkrC/mPM5J3zmacEkorS0UCWKyfIdHbz2y27GKiMjgdNK3jiLUJccEgn52+sL21I7IbSGKraduydAxtfic9MTkDzpZv+b7917a/WBFRAa5QV9gOY7DVf/7GZtNHEswHCBcGqJmRBU/e/Q7DB2b+xOsQ7+2P8ddfjihSJCS8giBUIAvnLg7t79zbUYyK6su5dZ5v8/3qYiIyAA1af8dOe/qr1BSHiFSHiYQCrDL/hP5wX2X5DyGMYYrnvkxW0/dIpnvysJU1JXz4799kyPOOzCj/1EXHMRWkzbP41mIiAwOWkWwk6WfLifaGmP0ViM8pw3moqWxhaWfrqB2ZDUVNeunVrz2zFzm/OdNDvjSFxi//dh8hSwiMiBpFUFv0bYYiz5YQtWQipynBnpZvmAlLQ0tjN56JI6TfIIVj8f52+8fw+czHHfp4Vo9UERkI7LlKhVYIiLS56jAEhGRvk7LtIuIiIiIiBSYCiwREREREZE8KXiBZYw52BjznjHmQ2PMdz0+N8aY61Kfv2mMmVTomERERNopT4mISD4VtMAyxjjAH4BDgO2AU4wx23XpdggwIfXrHOCmQsYkIiLSTnlKRETyrdBPsHYFPrTWfmytjQL3AUd16XMU8BebNBOoMsaMKHBcIiIioDwlIiJ5VugCaxSwoNOfF6bauttHRESkEJSnREQkrwpdYBmPtq7rwufSB2PMOcaY2caY2StWrMhLcCIiMujlLU+BcpWIiBS+wFoIjOn059HA4k3og7X2FmvtFGvtlCFDhuQ9UBERGZTylqdAuUpERAq80bAxxg+8D+wHLAJeBU611r7dqc9hwIXAocA04Dpr7a4bGXcF8FkPw6sDVvZwjP5isJyrznNg0XkOPN0513HW2oJXKIXKU6mvU67Knc5z4Bks56rzHFi6e56eucqfv3gyWWvjxpgLgacBB/iTtfZtY8y5qc9vBp4gmbQ+BJqBM3MYt8dJ1xgz22vn5YFosJyrznNg0XkOPH3xXAuVp1Jfq1yVI53nwDNYzlXnObDk6zwLWmABWGufIJmcOrfd3On3Frig0HGIiIh4UZ4SEZF8KvhGwyIiIiIiIoPFYC6wbil2AL1osJyrznNg0XkOPIPpXPNlsHzPdJ4Dz2A5V53nwJKX8yzoIhciIiIiIiKDyWB+giUiIiIiIpJXKrBERERERETyZNAVWMaYPxljlhtj3ip2LIVkjBljjHnWGDPfGPO2MebiYsdUCMaYsDFmljFmbuo8f1rsmArJGOMYY143xjxe7FgKyRjzqTFmnjHmDWPM7GLHUyjGmCpjzIPGmHdT/1Z3K3ZM+WaM2Tr199j+q94Yc0mx4+rrlKsGFuWqgWew5ClQrtqk8QbbO1jGmL2BRuAv1todih1PoRhjRgAjrLVzjDHlwGvA0dbad4ocWl4ZYwxQaq1tNMYEgBeBi621M4scWkEYYy4DpgAV1trDix1PoRhjPgWmWGsH9KaGxpg7gRestbcZY4JAibV2bZHDKhhjjENyM99p1tqebsA7oClXKVf1Z4MhVw2WPAXKVZsyxqB7gmWtfR5YXew4Cs1au8RaOyf1+wZgPjCquFHln01qTP0xkPo1IO8aGGNGA4cBtxU7Fuk5Y0wFsDdwO4C1NjqQE1bKfsBHKq42TrlqYFGukv5KuWrTDLoCazAyxowHdgFeKXIoBZGaivAGsBx4xlo7IM8TuAb4NuAWOY7eYIF/GWNeM8acU+xgCmRzYAXw59RUmtuMMaXFDqrATgbuLXYQ0jcpVw0Y1zA4ctVgyFOgXLVJVGANcMaYMuAh4BJrbX2x4ykEa23CWrszMBrY1Rgz4KbTGGMOB5Zba18rdiy9ZA9r7STgEOCC1HSpgcYPTAJustbuAjQB3y1uSIWTmlZyJPC3YscifY9y1cAwyHLVYMhToFy1SVRgDWCped4PAXdba/9e7Hj+v727C9l7juM4/v4wbEONLJHWPISQsBHmwOOipEhqIXMgRUrynDwcKFJOyGNEeYplirS1EkVbHtaYtZ0QbXmWsgNN9HVw/ZarO9vtWtfVf/d1v18n1//+/Z++98Hd5/7+/7///xq1dsv6feDCbisZiUXAJW3O92vAuUle6rak0amq79rnT8By4LRuKxqJLcCWvqvYy+iF2Li6CFhbVT92XYh2L2bVWJk2WTVNcgrMql1igzWm2gO1zwEbq+rRrusZlSRzk8xpy7OA84FNnRY1AlV1V1UdVlXz6d26fq+qruq4rJFIsm972J02DWExMHZvUquqH4DNSY5pQ+cBY/Vg/wRLcHqgJjCrxst0yarpklNgVu2qGUMoZEpJ8ipwNnBQki3AfVX1XLdVjcQi4GpgfZvzDXB3Vb3bXUkjcQjwYnvjyx7A61U1tq+FnSYOBpb3/u9iBvBKVa3otqSRuQl4uU1J+Bq4tuN6RiLJbOAC4Pqua5kqzCqzSru16ZRTYFYNfqzp9pp2SZIkSRoVpwhKkiRJ0pDYYEmSJEnSkNhgSZIkSdKQ2GBJkiRJ0pDYYEmSJEnSkNhgSZIkSdKQ2GBJI5RkaZJD/8d2LyS5fCfr30+ycMi1zUlyQ9/PZyfxe1kkaRoxp6Ths8GSRmspMGlwdWQOcMNkG0mSxtpSzClpqGywpAEkmZ9kU5IXk3yRZFmS2UkWJPkgyWdJViY5pF3pW0jv28/XJZmV5N4knyT5MskzaV8DP2ANi5OsTrI2yRtJ9mvj3yR5oI2vT3JsG5+bZFUbfzrJt0kOAh4Cjmy1PdIOv1/7nTYleXlX6pMkdceckrpngyUN7hjgmao6EfgduBF4DLi8qhYAzwMPVtUy4FPgyqo6qar+AB6vqlOr6gRgFnDxICdugXMPcH5VndKOf0vfJr+08SeBW9vYfcB7bXw5MK+N3wl81Wq7rY2dDNwMHAccASwapD5J0m7BnJI6NKPrAqQpaHNVfdSWXwLuBk4AVrULaXsC3+9g33OS3A7MBg4ENgBvD3Du0+mFykftXHsDq/vWv9k+PwMua8tnAZcCVNWKJL/t5PgfV9UWgCTrgPnAhwPUJ0nqnjkldcgGSxpcTfh5K7Chqs7Y2U5JZgJPAAuranOS+4GZA547wKqqWrKD9dva59/8+/c9yPSJbX3L/ceQJE0d5pTUIacISoObl2R7SC0B1gBzt48l2SvJ8W39VmD/trw9pH5p89F3+DamnVgDLEpyVDvX7CRHT7LPh8AVbfvFwAH/UZskaXyYU1KHbLCkwW0ErknyBb3pE4/RC6GHk3wOrAPObNu+ADzVpjFsA54F1gNvAZ8MeuKq+pneG59ebedfAxw7yW4PAIuTrAUuojctZGtV/UpvCseXfQ8PS5KmPnNK6lCqJt5FlrQjSeYD77SHf6eEJPsAf1fVX+3q5ZNVdVLHZUmSRsCckrrnvFVp/M0DXk+yB/AncF3H9UiS1M+c0ljxDpa0G0myHDh8wvAdVbWyi3okSepnTkmTs8GSJEmSpCHxJReSJEmSNCQ2WJIkSZI0JDZYkiRJkjQkNliSJEmSNCT/ANuTvVl5/HO8AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = plt.figure(figsize=(12, 5))\n", "\n", "plt.subplot(121)\n", "df_trans.scatter(df_trans.petal_length, df_trans.petal_width, c_expr=df_trans.class_)\n", "plt.title('Original classes')\n", "\n", "plt.subplot(122)\n", "df_trans.scatter(df_trans.petal_length, df_trans.petal_width, c_expr=df_trans.predicted_kmean_map)\n", "plt.title('Predicted classes')\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As with any algorithm implemented in `vaex.ml`, K-Means can be used on billions of samples. Fitting takes **under 2 minutes** when applied on the oversampled Iris dataset, numbering over **1 billion** samples." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T15:58:58.284463Z", "start_time": "2020-07-14T15:58:58.280028Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of samples in DataFrame: 1,005,000,000\n" ] } ], "source": [ "df = vaex.datasets.iris_1e9()\n", "n_samples = len(df)\n", "print(f'Number of samples in DataFrame: {n_samples:,}')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T15:59:20.061389Z", "start_time": "2020-07-14T15:58:58.855735Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Iteration 0, inertia 838974000.0037192\n", "Iteration 1, inertia 535903134.000306\n", "Iteration 2, inertia 530190921.4848897\n", "Iteration 3, inertia 528931941.03372437\n", "Iteration 4, inertia 528931941.0337243\n", "CPU times: user 2min 37s, sys: 1.26 s, total: 2min 39s\n", "Wall time: 19.9 s\n" ] } ], "source": [ "%%time\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "kmeans = vaex.ml.cluster.KMeans(features=features, n_clusters=3, max_iter=100, verbose=True, random_state=31)\n", "kmeans.fit(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Supervised learning\n", "\n", "While `vaex.ml` does not yet implement any supervised machine learning models, it does provide wrappers to several popular libraries such as [scikit-learn](https://scikit-learn.org/), [XGBoost](https://xgboost.readthedocs.io/), [LightGBM](https://lightgbm.readthedocs.io/) and [CatBoost](https://catboost.ai/). \n", "\n", "The main benefit of these wrappers is that they turn the models into `vaex.ml` transformers. This means the models become part of the DataFrame _state_ and thus can be serialized, and their predictions can be returned as _virtual columns_. This is especially useful for creating various diagnostic plots and evaluating performance metrics at no memory cost, as well as building ensembles. \n", "\n", "### `Scikit-Learn` example\n", "\n", "The `vaex.ml.sklearn` module provides convenient wrappers to the `scikit-learn` estimators. In fact, these wrappers can be used with any library that follows the API convention established by `scikit-learn`, i.e. implements the `.fit` and `.transform` methods.\n", "\n", "Here is an example:" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T15:59:30.707188Z", "start_time": "2020-07-14T15:59:30.385719Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ prediction
0 5.9 3.0 4.2 1.5 1 1
1 6.1 3.0 4.6 1.4 1 1
2 6.6 2.9 4.6 1.3 1 1
3 6.7 3.3 5.7 2.1 2 2
4 5.5 4.2 1.4 0.2 0 0
... ... ... ... ... ... ...
1455.2 3.4 1.4 0.2 0 0
1465.1 3.8 1.6 0.2 0 0
1475.8 2.6 4.0 1.2 1 1
1485.7 3.8 1.7 0.3 0 0
1496.2 2.9 4.3 1.3 1 1
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ prediction\n", "0 5.9 3.0 4.2 1.5 1 1\n", "1 6.1 3.0 4.6 1.4 1 1\n", "2 6.6 2.9 4.6 1.3 1 1\n", "3 6.7 3.3 5.7 2.1 2 2\n", "4 5.5 4.2 1.4 0.2 0 0\n", "... ... ... ... ... ... ...\n", "145 5.2 3.4 1.4 0.2 0 0\n", "146 5.1 3.8 1.6 0.2 0 0\n", "147 5.8 2.6 4.0 1.2 1 1\n", "148 5.7 3.8 1.7 0.3 0 0\n", "149 6.2 2.9 4.3 1.3 1 1" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from vaex.ml.sklearn import Predictor\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "\n", "df = vaex.datasets.iris()\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "target = 'class_'\n", "\n", "model = GradientBoostingClassifier(random_state=42)\n", "vaex_model = Predictor(features=features, target=target, model=model, prediction_name='prediction')\n", "\n", "vaex_model.fit(df=df)\n", "\n", "df = vaex_model.transform(df)\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One can still train a predictive model on datasets that are too big to fit into memory by leveraging the on-line learners provided by `scikit-learn`. The `vaex.ml.sklearn.IncrementalPredictor` conveniently wraps these learners and provides control on how the data is passed to them from a `vaex` DataFrame. \n", "\n", "Let us train a model on the oversampled Iris dataset which comprises over 1 billion samples." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:08:08.898670Z", "start_time": "2020-07-14T15:59:33.194286Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "88b195fa8e9b4086a999e5da0b53a6a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ prediction
0 5.9 3.0 4.2 1.5 1 1
1 6.1 3.0 4.6 1.4 1 1
2 6.6 2.9 4.6 1.3 1 1
3 6.7 3.3 5.7 2.1 2 2
4 5.5 4.2 1.4 0.2 0 0
... ... ... ... ... ... ...
1,004,999,9955.2 3.4 1.4 0.2 0 0
1,004,999,9965.1 3.8 1.6 0.2 0 0
1,004,999,9975.8 2.6 4.0 1.2 1 1
1,004,999,9985.7 3.8 1.7 0.3 0 0
1,004,999,9996.2 2.9 4.3 1.3 1 1
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ prediction\n", "0 5.9 3.0 4.2 1.5 1 1\n", "1 6.1 3.0 4.6 1.4 1 1\n", "2 6.6 2.9 4.6 1.3 1 1\n", "3 6.7 3.3 5.7 2.1 2 2\n", "4 5.5 4.2 1.4 0.2 0 0\n", "... ... ... ... ... ... ...\n", "1,004,999,995 5.2 3.4 1.4 0.2 0 0\n", "1,004,999,996 5.1 3.8 1.6 0.2 0 0\n", "1,004,999,997 5.8 2.6 4.0 1.2 1 1\n", "1,004,999,998 5.7 3.8 1.7 0.3 0 0\n", "1,004,999,999 6.2 2.9 4.3 1.3 1 1" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from vaex.ml.sklearn import IncrementalPredictor\n", "from sklearn.linear_model import SGDClassifier\n", "\n", "df = vaex.datasets.iris_1e9()\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "target = 'class_'\n", "\n", "model = SGDClassifier(learning_rate='constant', eta0=0.0001, random_state=42)\n", "vaex_model = IncrementalPredictor(features=features, target=target, model=model, \n", " batch_size=500_000, partial_fit_kwargs={'classes':[0, 1, 2]})\n", "\n", "vaex_model.fit(df=df, progress='widget')\n", "\n", "df = vaex_model.transform(df)\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `XGBoost` example\n", "\n", "Libraries such as `XGBoost` provide more options such as validation during training and early stopping for example. We provide wrappers that keeps close to the native API of these libraries, in addition to the `scikit-learn` API. \n", "\n", "While the following example showcases the `XGBoost` wrapper, `vaex.ml` implements similar wrappers for `LightGBM` and `CatBoost`." ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:08:44.463784Z", "start_time": "2020-07-14T16:08:43.893355Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[13:41:31] WARNING: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softmax' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ xgboost_prediction
0 5.9 3.0 4.2 1.5 1 1.0
1 6.1 3.0 4.6 1.4 1 1.0
2 6.6 2.9 4.6 1.3 1 1.0
3 6.7 3.3 5.7 2.1 2 2.0
4 5.5 4.2 1.4 0.2 0 0.0
... ... ... ... ... ... ...
80,3955.2 3.4 1.4 0.2 0 0.0
80,3965.1 3.8 1.6 0.2 0 0.0
80,3975.8 2.6 4.0 1.2 1 1.0
80,3985.7 3.8 1.7 0.3 0 0.0
80,3996.2 2.9 4.3 1.3 1 1.0
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ xgboost_prediction\n", "0 5.9 3.0 4.2 1.5 1 1.0\n", "1 6.1 3.0 4.6 1.4 1 1.0\n", "2 6.6 2.9 4.6 1.3 1 1.0\n", "3 6.7 3.3 5.7 2.1 2 2.0\n", "4 5.5 4.2 1.4 0.2 0 0.0\n", "... ... ... ... ... ... ...\n", "80,395 5.2 3.4 1.4 0.2 0 0.0\n", "80,396 5.1 3.8 1.6 0.2 0 0.0\n", "80,397 5.8 2.6 4.0 1.2 1 1.0\n", "80,398 5.7 3.8 1.7 0.3 0 0.0\n", "80,399 6.2 2.9 4.3 1.3 1 1.0" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from vaex.ml.xgboost import XGBoostModel\n", "\n", "df = vaex.datasets.iris_1e5()\n", "df_train, df_test = df.ml.train_test_split(test_size=0.2, verbose=False)\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "target = 'class_'\n", "\n", "params = {'learning_rate': 0.1,\n", " 'max_depth': 3, \n", " 'num_class': 3, \n", " 'objective': 'multi:softmax',\n", " 'subsample': 1,\n", " 'random_state': 42,\n", " 'n_jobs': -1}\n", "\n", "\n", "booster = XGBoostModel(features=features, target=target, num_boost_round=500, params=params)\n", "booster.fit(df=df_train, evals=[(df_train, 'train'), (df_test, 'test')], early_stopping_rounds=5)\n", "\n", "df_test = booster.transform(df_train)\n", "df_test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `CatBoost` example\n", "\n", "The CatBoost library supports summing up models. With this feature, we can use CatBoost to train a model using data that is otherwise too large to fit in memory. The idea is to train a single CatBoost model per chunk of data, and than sum up the invidiual models to create a master model. To use this feature via `vaex.ml` just specify the `batch_size` argument in the `CatBoostModel` wrapper. One can also specify additional options such as the strategy on how to sum up the individual models, or how they should be weighted." ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:09:54.623370Z", "start_time": "2020-07-14T16:08:46.494467Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bce3f89da0d24245969e3416310865f2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ catboost_prediction
0 5.9 3.0 4.2 1.5 1 array([1])
1 6.1 3.0 4.6 1.4 1 array([1])
2 6.6 2.9 4.6 1.3 1 array([1])
3 6.7 3.3 5.7 2.1 2 array([2])
4 5.5 4.2 1.4 0.2 0 array([0])
... ... ... ... ... ... ...
80,399,9955.2 3.4 1.4 0.2 0 array([0])
80,399,9965.1 3.8 1.6 0.2 0 array([0])
80,399,9975.8 2.6 4.0 1.2 1 array([1])
80,399,9985.7 3.8 1.7 0.3 0 array([0])
80,399,9996.2 2.9 4.3 1.3 1 array([1])
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ catboost_prediction\n", "0 5.9 3.0 4.2 1.5 1 array([1])\n", "1 6.1 3.0 4.6 1.4 1 array([1])\n", "2 6.6 2.9 4.6 1.3 1 array([1])\n", "3 6.7 3.3 5.7 2.1 2 array([2])\n", "4 5.5 4.2 1.4 0.2 0 array([0])\n", "... ... ... ... ... ... ...\n", "80,399,995 5.2 3.4 1.4 0.2 0 array([0])\n", "80,399,996 5.1 3.8 1.6 0.2 0 array([0])\n", "80,399,997 5.8 2.6 4.0 1.2 1 array([1])\n", "80,399,998 5.7 3.8 1.7 0.3 0 array([0])\n", "80,399,999 6.2 2.9 4.3 1.3 1 array([1])" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from vaex.ml.catboost import CatBoostModel\n", "\n", "df = vaex.datasets.iris_1e8()\n", "df_train, df_test = df.ml.train_test_split(test_size=0.2, verbose=False)\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "target = 'class_'\n", "\n", "params = {\n", " 'leaf_estimation_method': 'Gradient',\n", " 'learning_rate': 0.1,\n", " 'max_depth': 3,\n", " 'bootstrap_type': 'Bernoulli',\n", " 'subsample': 0.8,\n", " 'sampling_frequency': 'PerTree',\n", " 'colsample_bylevel': 0.8,\n", " 'reg_lambda': 1,\n", " 'objective': 'MultiClass',\n", " 'eval_metric': 'MultiClass',\n", " 'random_state': 42,\n", " 'verbose': 0,\n", "}\n", "\n", "booster = CatBoostModel(features=features, target=target, num_boost_round=23, \n", " params=params, prediction_type='Class', batch_size=11_000_000)\n", "booster.fit(df=df_train, progress='widget')\n", "\n", "df_test = booster.transform(df_train)\n", "df_test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `Keras` example\n", "\n", "`Keras` is the most popular high-level API to building neural network models with tensorflow as its backend. Neural networks can have very diverse and complicated architectures, and their training loops can be both simple and sophisticated. This is why, at least for now, we leave the users to train their `keras` models as they normaly would, and in `vaex-ml` provides a simple wrapper for serialization and lazy evaluation of those models. In addition, `vaex-ml` also provides a convenience method to turn a DataFrame into a generator, suitable for training of `Keras` models. See the example below." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-08-14 23:47:55.800260: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:55.800282: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Recommended \"steps_per_epoch\" arg: 516.0\n", "Recommended \"steps_per_epoch\" arg: 65.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-08-14 23:47:57.111408: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2021-08-14 23:47:57.111910: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.111974: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112032: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112093: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112150: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcurand.so.10'; dlerror: libcurand.so.10: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112206: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112261: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusparse.so.11'; dlerror: libcusparse.so.11: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112317: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory\n", "2021-08-14 23:47:57.112327: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n", "Skipping registering GPU devices...\n", "2021-08-14 23:47:57.112682: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/11\n", " 11/516 [..............................] - ETA: 2s - loss: 1.7922 " ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-08-14 23:47:57.326751: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "516/516 [==============================] - 3s 6ms/step - loss: 0.2172 - val_loss: 0.1724\n", "Epoch 2/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1736 - val_loss: 0.1715\n", "Epoch 3/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1729 - val_loss: 0.1705\n", "Epoch 4/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1725 - val_loss: 0.1707\n", "Epoch 5/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1722 - val_loss: 0.1708\n", "Epoch 6/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1720 - val_loss: 0.1701\n", "Epoch 7/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1718 - val_loss: 0.1697\n", "Epoch 8/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1717 - val_loss: 0.1706\n", "Epoch 9/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1715 - val_loss: 0.1698\n", "Epoch 10/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1714 - val_loss: 0.1702\n", "Epoch 11/11\n", "516/516 [==============================] - 3s 6ms/step - loss: 0.1713 - val_loss: 0.1701\n", "INFO:tensorflow:Assets written to: /tmp/tmp14gsptzz/assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-08-14 23:48:31.519641: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# id x y z vx vy vz E L Lz FeH minmax_scaled_x minmax_scaled_y minmax_scaled_z minmax_scaled_vx minmax_scaled_vy minmax_scaled_vzkeras_pred
0 23 0.137403-5.07974 1.40165 111.828 62.8776 -88.121 -134786 700.236 576.698-1.7935 0.375163 0.72055 0.397008 0.570648 0.56065 0.414253array([-1.6143968], dtype=float32)
1 31-1.95543 -0.840676 1.26239 -259.282 20.8279-148.457 -134990 676.813-258.7 -0.623007 0.365132 0.738746 0.395427 0.266912 0.5249 0.357964array([-1.509573], dtype=float32)
2 22 2.33077 -0.570014 0.761285 -53.4566-43.377 -71.3196-177062 196.209-131.573-0.889463 0.385676 0.739908 0.389737 0.43537 0.470313 0.429927array([-1.5752358], dtype=float32)
3 26 0.777881-2.83258 0.0797214 256.427 202.451 -12.76 -125176 884.581 883.833-1.65996 0.378233 0.730196 0.381998 0.688994 0.679314 0.484558array([-1.6558373], dtype=float32)
4 1 3.37429 2.62885 -0.797169 300.697 153.772 83.9173 -97150.4681.868-271.616-1.6496 0.390678 0.753639 0.372041 0.725228 0.637928 0.574749array([-1.6719546], dtype=float32)
" ], "text/plain": [ " # id x y z vx vy vz E L Lz FeH minmax_scaled_x minmax_scaled_y minmax_scaled_z minmax_scaled_vx minmax_scaled_vy minmax_scaled_vz keras_pred\n", " 0 23 0.137403 -5.07974 1.40165 111.828 62.8776 -88.121 -134786 700.236 576.698 -1.7935 0.375163 0.72055 0.397008 0.570648 0.56065 0.414253 array([-1.6143968], dtype=float32)\n", " 1 31 -1.95543 -0.840676 1.26239 -259.282 20.8279 -148.457 -134990 676.813 -258.7 -0.623007 0.365132 0.738746 0.395427 0.266912 0.5249 0.357964 array([-1.509573], dtype=float32)\n", " 2 22 2.33077 -0.570014 0.761285 -53.4566 -43.377 -71.3196 -177062 196.209 -131.573 -0.889463 0.385676 0.739908 0.389737 0.43537 0.470313 0.429927 array([-1.5752358], dtype=float32)\n", " 3 26 0.777881 -2.83258 0.0797214 256.427 202.451 -12.76 -125176 884.581 883.833 -1.65996 0.378233 0.730196 0.381998 0.688994 0.679314 0.484558 array([-1.6558373], dtype=float32)\n", " 4 1 3.37429 2.62885 -0.797169 300.697 153.772 83.9173 -97150.4 681.868 -271.616 -1.6496 0.390678 0.753639 0.372041 0.725228 0.637928 0.574749 array([-1.6719546], dtype=float32)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import vaex.ml.tensorflow\n", "import tensorflow.keras as K\n", "\n", "df = vaex.example()\n", "df_train, df_valid, df_test = df.split_random([0.8, 0.1, 0.1], random_state=42)\n", "\n", "features = ['x', 'y', 'z', 'vx', 'vy', 'vz']\n", "target = 'FeH'\n", "\n", "# Scaling the features\n", "df_train = df_train.ml.minmax_scaler(features=features)\n", "features = df_train.get_column_names(regex='^minmax_')\n", "\n", "# Apply preprocessing to the validation\n", "state_prep = df_train.state_get()\n", "df_valid.state_set(state_prep)\n", "\n", "# Generators for the train and validation sets\n", "gen_train = df_train.ml.tensorflow.to_keras_generator(features=features, target=target, batch_size=512)\n", "gen_valid = df_valid.ml.tensorflow.to_keras_generator(features=features, target=target, batch_size=512)\n", "\n", "# Create and fit a simple Sequential Keras model\n", "nn_model = K.Sequential()\n", "nn_model.add(K.layers.Dense(3, activation='tanh'))\n", "nn_model.add(K.layers.Dense(1, activation='linear'))\n", "nn_model.compile(optimizer='sgd', loss='mse')\n", "nn_model.fit(x=gen_train, validation_data=gen_valid, epochs=11, steps_per_epoch=516, validation_steps=65)\n", "\n", "# Serialize the model\n", "keras_model = vaex.ml.tensorflow.KerasModel(features=features, prediction_name='keras_pred', model=nn_model)\n", "df_train = keras_model.transform(df_train)\n", "\n", "# Apply all the transformations to the test set\n", "state = df_train.state_get()\n", "df_test.state_set(state)\n", "\n", "# Preview the results\n", "df_test.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `River` example\n", "\n", "`River` is an up-and-coming library for online learning, and provides a variety of models that can learn incrementally. While most of the `river` models currently support per-sample training, few do support mini-batch training which is extremely fast - a great synergy to do machine learning with vaex." ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2021-04-13T11:12:20.713420Z", "start_time": "2021-04-13T11:12:20.695920Z" }, "tags": [ "skip-ci" ] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "385a30c0435042b0a69ec5e8ef3c3a48", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ prediction_raw
0 5.9 3.0 4.2 1.5 1 1.2262451850482554
1 6.1 3.0 4.6 1.4 1 1.3372106202149072
2 6.6 2.9 4.6 1.3 1 1.3080263625894342
3 6.7 3.3 5.7 2.1 2 1.8246442870772779
4 5.5 4.2 1.4 0.2 0 -0.1719159051653813
... ... ... ... ... ... ...
200,999,9955.2 3.4 1.4 0.2 0 -0.06961837848289065
200,999,9965.1 3.8 1.6 0.2 0 -0.04133966888449841
200,999,9975.8 2.6 4.0 1.2 1 1.1380612859534056
200,999,9985.7 3.8 1.7 0.3 0 -0.005633275295105093
200,999,9996.2 2.9 4.3 1.3 1 1.2171097577656713
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ prediction_raw\n", "0 5.9 3.0 4.2 1.5 1 1.2262451850482554\n", "1 6.1 3.0 4.6 1.4 1 1.3372106202149072\n", "2 6.6 2.9 4.6 1.3 1 1.3080263625894342\n", "3 6.7 3.3 5.7 2.1 2 1.8246442870772779\n", "4 5.5 4.2 1.4 0.2 0 -0.1719159051653813\n", "... ... ... ... ... ... ...\n", "200,999,995 5.2 3.4 1.4 0.2 0 -0.06961837848289065\n", "200,999,996 5.1 3.8 1.6 0.2 0 -0.04133966888449841\n", "200,999,997 5.8 2.6 4.0 1.2 1 1.1380612859534056\n", "200,999,998 5.7 3.8 1.7 0.3 0 -0.005633275295105093\n", "200,999,999 6.2 2.9 4.3 1.3 1 1.2171097577656713" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from vaex.ml.incubator.river import RiverModel\n", "from river.linear_model import LinearRegression\n", "from river import optim\n", "\n", "\n", "df = vaex.datasets.iris_1e9()\n", "df_train, df_test = df.ml.train_test_split(test_size=0.2, verbose=False)\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "target = 'class_'\n", "\n", "river_model = RiverModel(features=features,\n", " target=target,\n", " model=LinearRegression(optimizer=optim.SGD(0.001), intercept_lr=0.001),\n", " prediction_name='prediction_raw',\n", " batch_size=500_000)\n", "river_model.fit(df_train, progress='widget')\n", "river_model.transform(df_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Metrics\n", "\n", "`vaex-ml` also provides several of the most common evaluation metrics for classification and regression tasks. These metrics are implemented in `vaex-ml` and thus are evaluated out-of-core, so you do not need to materialize the target and predicted columns. \n", "\n", "Here is a list of the currently supported metrics:\n", "\n", "- Classification (binary, and macro-average for multiclass problems):\n", " - Accuracy\n", " - Precision\n", " - Recall\n", " - F1-score\n", " - Confusion matrix\n", " - Classification report (a convenience method, which prints out the accuracy, precision, recall, and F1-score at the same time)\n", " - Matthews Correlation Coeficient\n", "- Regression\n", " - Mean Absolute Error\n", " - Mean Squared Error\n", " - R2 Correlation Score\n", "\n", "Here is a simple example:" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Classification report:\n", "\n", " Accuracy: 0.933\n", " Precision: 0.928\n", " Recall: 0.928\n", " F1: 0.928\n", " \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/jovan/vaex/packages/vaex-core/vaex/dataframe.py:5516: UserWarning: It seems your column class_ is already ordinal encoded (values between 0 and 2), automatically switching to use df.categorize\n", " warnings.warn(f'It seems your column {column} is already ordinal encoded (values between {min_value} and {max_value}), automatically switching to use df.categorize')\n", "/home/jovan/vaex/packages/vaex-core/vaex/dataframe.py:5516: UserWarning: It seems your column pred is already ordinal encoded (values between 0 and 2), automatically switching to use df.categorize\n", " warnings.warn(f'It seems your column {column} is already ordinal encoded (values between {min_value} and {max_value}), automatically switching to use df.categorize')\n" ] } ], "source": [ "import vaex.ml.metrics\n", "from sklearn.linear_model import LogisticRegression\n", "\n", "df = vaex.datasets.iris()\n", "df_train, df_test = df.split_random([0.8, 0.2], random_state=55)\n", "\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", "target = 'class_'\n", "\n", "model = LogisticRegression(random_state=42)\n", "vaex_model = Predictor(features=features, target=target, model=model, prediction_name='pred')\n", "\n", "vaex_model.fit(df=df_train)\n", "\n", "df_test = vaex_model.transform(df_test)\n", "\n", "print(df_test.ml.metrics.classification_report(df_test.class_, df_test.pred, average='macro'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## State transfer - pipelines made easy\n", "\n", "Each `vaex` DataFrame consists of two parts: _data_ and _state_. The _data_ is immutable, and any operation such as filtering, adding new columns, or applying transformers or predictive models just modifies the _state_. This is extremely powerful concept and can completely redefine how we imagine machine learning pipelines. \n", "\n", "As an example, let us once again create a model based on the Iris dataset. Here, we will create a couple of new features, do a PCA transformation, and finally train a predictive model. " ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:10:19.919524Z", "start_time": "2020-07-14T16:10:19.873625Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5
0 5.4 3.0 4.5 1.5 1 3.0 1.8 -1.510547480171215 0.3611524321126822 -0.4005106138591812 0.5491844107628985 0.21135370342329635 -0.009542243224854377
1 4.8 3.4 1.6 0.2 0 8.0 1.411764705882353 4.447550641536847 0.2799644730487585 -0.04904458661276928 0.18719360579644695 0.10928493945448532 0.005228919010020094
2 6.9 3.1 4.9 1.5 1 3.266666666666667 2.2258064516129035-1.777649528149752 -0.60828897708458910.48007833550651513 -0.377620118668313350.05174472701894024 -0.04673816474220924
3 4.4 3.2 1.3 0.2 0 6.5 1.375 3.400548263702555 1.437036928591846 -0.3662652846960042 0.23420836198441913 0.05750021481634099 -0.023055011653267066
4 5.6 2.8 4.9 2.0 2 2.45 2.0 -2.32450987662220940.14710673877401348-0.5150809942258257 0.5471824391426298 -0.12154714382375817 0.0044686197532133876
... ... ... ... ... ... ... ... ... ... ... ... ... ...
1155.2 3.4 1.4 0.2 0 6.999999999999999 1.52941176470588253.623794583238953 0.8255759252729563 0.23453320686724874 -0.17599408825208826-0.04687036865354327 -0.02424621891240747
1165.1 3.8 1.6 0.2 0 8.0 1.34210526315789474.42115266246093 0.222875055336637040.4450642830179705 0.2184424557783562 0.14504752606375293 0.07229123907677276
1175.8 2.6 4.0 1.2 1 3.33333333333333352.230769230769231 -1.069062832993727 0.3874258314654399 -0.4471767749236783 -0.2956609879568117 -0.0010695982441835394-0.0065225306610744715
1185.7 3.8 1.7 0.3 0 5.666666666666667 1.50000000000000022.2846521048417037 1.1920826609681359 0.8273738848637026 -0.210489464627257370.03381892388998425 0.018792165273013528
1196.2 2.9 4.3 1.3 1 3.30769230769230752.137931034482759 -1.29882299587484520.06960434514054464-0.0012167985718341268-0.240722552191808830.05282732890885841 -0.032459999314411514
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5\n", "0 5.4 3.0 4.5 1.5 1 3.0 1.8 -1.510547480171215 0.3611524321126822 -0.4005106138591812 0.5491844107628985 0.21135370342329635 -0.009542243224854377\n", "1 4.8 3.4 1.6 0.2 0 8.0 1.411764705882353 4.447550641536847 0.2799644730487585 -0.04904458661276928 0.18719360579644695 0.10928493945448532 0.005228919010020094\n", "2 6.9 3.1 4.9 1.5 1 3.266666666666667 2.2258064516129035 -1.777649528149752 -0.6082889770845891 0.48007833550651513 -0.37762011866831335 0.05174472701894024 -0.04673816474220924\n", "3 4.4 3.2 1.3 0.2 0 6.5 1.375 3.400548263702555 1.437036928591846 -0.3662652846960042 0.23420836198441913 0.05750021481634099 -0.023055011653267066\n", "4 5.6 2.8 4.9 2.0 2 2.45 2.0 -2.3245098766222094 0.14710673877401348 -0.5150809942258257 0.5471824391426298 -0.12154714382375817 0.0044686197532133876\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "115 5.2 3.4 1.4 0.2 0 6.999999999999999 1.5294117647058825 3.623794583238953 0.8255759252729563 0.23453320686724874 -0.17599408825208826 -0.04687036865354327 -0.02424621891240747\n", "116 5.1 3.8 1.6 0.2 0 8.0 1.3421052631578947 4.42115266246093 0.22287505533663704 0.4450642830179705 0.2184424557783562 0.14504752606375293 0.07229123907677276\n", "117 5.8 2.6 4.0 1.2 1 3.3333333333333335 2.230769230769231 -1.069062832993727 0.3874258314654399 -0.4471767749236783 -0.2956609879568117 -0.0010695982441835394 -0.0065225306610744715\n", "118 5.7 3.8 1.7 0.3 0 5.666666666666667 1.5000000000000002 2.2846521048417037 1.1920826609681359 0.8273738848637026 -0.21048946462725737 0.03381892388998425 0.018792165273013528\n", "119 6.2 2.9 4.3 1.3 1 3.3076923076923075 2.137931034482759 -1.2988229958748452 0.06960434514054464 -0.0012167985718341268 -0.24072255219180883 0.05282732890885841 -0.032459999314411514" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load data and split it in train and test sets\n", "df = vaex.datasets.iris()\n", "df_train, df_test = df.ml.train_test_split(test_size=0.2, verbose=False)\n", "\n", "# Create new features\n", "df_train['petal_ratio'] = df_train.petal_length / df_train.petal_width\n", "df_train['sepal_ratio'] = df_train.sepal_length / df_train.sepal_width\n", "\n", "# Do a PCA transformation\n", "features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width', 'petal_ratio', 'sepal_ratio']\n", "pca = vaex.ml.PCA(features=features, n_components=6)\n", "df_train = pca.fit_transform(df_train)\n", "\n", "# Display the training DataFrame at this stage\n", "df_train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "At this point, we are ready to train a predictive model. In this example, let's use `LightGBM` with its `scikit-learn` API. " ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:10:22.228285Z", "start_time": "2020-07-14T16:10:22.152722Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 prediction
0 5.4 3.0 4.5 1.5 1 3.0 1.8 -1.510547480171215 0.3611524321126822 -0.4005106138591812 0.5491844107628985 0.21135370342329635 -0.009542243224854377 1
1 4.8 3.4 1.6 0.2 0 8.0 1.411764705882353 4.447550641536847 0.2799644730487585 -0.04904458661276928 0.18719360579644695 0.10928493945448532 0.005228919010020094 0
2 6.9 3.1 4.9 1.5 1 3.266666666666667 2.2258064516129035-1.777649528149752 -0.60828897708458910.48007833550651513 -0.377620118668313350.05174472701894024 -0.04673816474220924 1
3 4.4 3.2 1.3 0.2 0 6.5 1.375 3.400548263702555 1.437036928591846 -0.3662652846960042 0.23420836198441913 0.05750021481634099 -0.023055011653267066 0
4 5.6 2.8 4.9 2.0 2 2.45 2.0 -2.32450987662220940.14710673877401348-0.5150809942258257 0.5471824391426298 -0.12154714382375817 0.0044686197532133876 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1155.2 3.4 1.4 0.2 0 6.999999999999999 1.52941176470588253.623794583238953 0.8255759252729563 0.23453320686724874 -0.17599408825208826-0.04687036865354327 -0.02424621891240747 0
1165.1 3.8 1.6 0.2 0 8.0 1.34210526315789474.42115266246093 0.222875055336637040.4450642830179705 0.2184424557783562 0.14504752606375293 0.07229123907677276 0
1175.8 2.6 4.0 1.2 1 3.33333333333333352.230769230769231 -1.069062832993727 0.3874258314654399 -0.4471767749236783 -0.2956609879568117 -0.0010695982441835394-0.00652253066107447151
1185.7 3.8 1.7 0.3 0 5.666666666666667 1.50000000000000022.2846521048417037 1.1920826609681359 0.8273738848637026 -0.210489464627257370.03381892388998425 0.018792165273013528 0
1196.2 2.9 4.3 1.3 1 3.30769230769230752.137931034482759 -1.29882299587484520.06960434514054464-0.0012167985718341268-0.240722552191808830.05282732890885841 -0.032459999314411514 1
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 prediction\n", "0 5.4 3.0 4.5 1.5 1 3.0 1.8 -1.510547480171215 0.3611524321126822 -0.4005106138591812 0.5491844107628985 0.21135370342329635 -0.009542243224854377 1\n", "1 4.8 3.4 1.6 0.2 0 8.0 1.411764705882353 4.447550641536847 0.2799644730487585 -0.04904458661276928 0.18719360579644695 0.10928493945448532 0.005228919010020094 0\n", "2 6.9 3.1 4.9 1.5 1 3.266666666666667 2.2258064516129035 -1.777649528149752 -0.6082889770845891 0.48007833550651513 -0.37762011866831335 0.05174472701894024 -0.04673816474220924 1\n", "3 4.4 3.2 1.3 0.2 0 6.5 1.375 3.400548263702555 1.437036928591846 -0.3662652846960042 0.23420836198441913 0.05750021481634099 -0.023055011653267066 0\n", "4 5.6 2.8 4.9 2.0 2 2.45 2.0 -2.3245098766222094 0.14710673877401348 -0.5150809942258257 0.5471824391426298 -0.12154714382375817 0.0044686197532133876 2\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "115 5.2 3.4 1.4 0.2 0 6.999999999999999 1.5294117647058825 3.623794583238953 0.8255759252729563 0.23453320686724874 -0.17599408825208826 -0.04687036865354327 -0.02424621891240747 0\n", "116 5.1 3.8 1.6 0.2 0 8.0 1.3421052631578947 4.42115266246093 0.22287505533663704 0.4450642830179705 0.2184424557783562 0.14504752606375293 0.07229123907677276 0\n", "117 5.8 2.6 4.0 1.2 1 3.3333333333333335 2.230769230769231 -1.069062832993727 0.3874258314654399 -0.4471767749236783 -0.2956609879568117 -0.0010695982441835394 -0.0065225306610744715 1\n", "118 5.7 3.8 1.7 0.3 0 5.666666666666667 1.5000000000000002 2.2846521048417037 1.1920826609681359 0.8273738848637026 -0.21048946462725737 0.03381892388998425 0.018792165273013528 0\n", "119 6.2 2.9 4.3 1.3 1 3.3076923076923075 2.137931034482759 -1.2988229958748452 0.06960434514054464 -0.0012167985718341268 -0.24072255219180883 0.05282732890885841 -0.032459999314411514 1" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import lightgbm\n", "\n", "features = df_train.get_column_names(regex='^PCA')\n", "\n", "booster = lightgbm.LGBMClassifier()\n", "\n", "vaex_model = Predictor(model=booster, features=features, target='class_')\n", "\n", "vaex_model.fit(df=df_train)\n", "df_train = vaex_model.transform(df_train)\n", "\n", "df_train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The final `df_train` DataFrame contains all the features we created, including the predictions right at the end. Now, we would like to apply the same transformations to the test set. All we need to do, is to simply extract the _state_ from `df_train` and apply it to `df_test`. This will propagate all the changes that were made to the training set on the test set.\n" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:10:25.031158Z", "start_time": "2020-07-14T16:10:24.986397Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 prediction
0 5.9 3.0 4.2 1.5 1 2.80000000000000031.9666666666666668-1.642627940409072 0.49931302910747727 -0.063088008066644660.10842057110641677 -0.03924298664189224-0.0273944397002728221
1 6.1 3.0 4.6 1.4 1 3.28571428571428562.033333333333333 -1.445047446393471 -0.1019091578746504 -0.018990122394938010.0209807676460904080.1614215276667148 -0.02716639637934938 1
2 6.6 2.9 4.6 1.3 1 3.538461538461538 2.2758620689655173-1.330564613235537 -0.419784747491312670.1759590589290671 -0.4631301992308477 0.08304243689815374 -0.0333517336774292741
3 6.7 3.3 5.7 2.1 2 2.71428571428571442.0303030303030303-2.6719170661531013-0.9149428897499291 0.4156162725009377 0.34633692661436644 0.03742964707590906 -0.0132542861962457742
4 5.5 4.2 1.4 0.2 0 6.999999999999999 1.30952380952380953.6322930267831404 0.8198526437905096 1.046277579362938 0.09738737839850209 0.09412658096734221 0.1329137026697501 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
255.5 2.5 4.0 1.3 1 3.07692307692307662.2 -1.25231200886008960.5975071562677784 -0.7019801415469216 -0.11489031841855571-0.036159457820878690.005496321827264977 1
265.8 2.7 3.9 1.2 1 3.25 2.148148148148148 -1.07923521659046570.5236883751378523 -0.34037717939532286-0.23743695029955128-0.00936891422024664-0.02184110533380834 1
274.4 2.9 1.4 0.2 0 6.999999999999999 1.517241379310345 3.7422969192506095 1.048460304741977 -0.636475521315278 0.07623157913054074 0.004215355833312173-0.06354157393133958 0
284.5 2.3 1.3 0.3 0 4.333333333333334 1.956521739130435 1.4537380535696471 2.4197864889383505 -1.0301500321688102 -0.5150263062576134 -0.2631218962099228 -0.06608059456656257 0
296.9 3.2 5.7 2.3 2 2.47826086956521772.15625 -2.963110301521378 -0.924626055589704 0.44833006106219797 0.20994670504662372 -0.2012725506779131 -0.0189004142877193532
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 prediction\n", "0 5.9 3.0 4.2 1.5 1 2.8000000000000003 1.9666666666666668 -1.642627940409072 0.49931302910747727 -0.06308800806664466 0.10842057110641677 -0.03924298664189224 -0.027394439700272822 1\n", "1 6.1 3.0 4.6 1.4 1 3.2857142857142856 2.033333333333333 -1.445047446393471 -0.1019091578746504 -0.01899012239493801 0.020980767646090408 0.1614215276667148 -0.02716639637934938 1\n", "2 6.6 2.9 4.6 1.3 1 3.538461538461538 2.2758620689655173 -1.330564613235537 -0.41978474749131267 0.1759590589290671 -0.4631301992308477 0.08304243689815374 -0.033351733677429274 1\n", "3 6.7 3.3 5.7 2.1 2 2.7142857142857144 2.0303030303030303 -2.6719170661531013 -0.9149428897499291 0.4156162725009377 0.34633692661436644 0.03742964707590906 -0.013254286196245774 2\n", "4 5.5 4.2 1.4 0.2 0 6.999999999999999 1.3095238095238095 3.6322930267831404 0.8198526437905096 1.046277579362938 0.09738737839850209 0.09412658096734221 0.1329137026697501 0\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "25 5.5 2.5 4.0 1.3 1 3.0769230769230766 2.2 -1.2523120088600896 0.5975071562677784 -0.7019801415469216 -0.11489031841855571 -0.03615945782087869 0.005496321827264977 1\n", "26 5.8 2.7 3.9 1.2 1 3.25 2.148148148148148 -1.0792352165904657 0.5236883751378523 -0.34037717939532286 -0.23743695029955128 -0.00936891422024664 -0.02184110533380834 1\n", "27 4.4 2.9 1.4 0.2 0 6.999999999999999 1.517241379310345 3.7422969192506095 1.048460304741977 -0.636475521315278 0.07623157913054074 0.004215355833312173 -0.06354157393133958 0\n", "28 4.5 2.3 1.3 0.3 0 4.333333333333334 1.956521739130435 1.4537380535696471 2.4197864889383505 -1.0301500321688102 -0.5150263062576134 -0.2631218962099228 -0.06608059456656257 0\n", "29 6.9 3.2 5.7 2.3 2 2.4782608695652177 2.15625 -2.963110301521378 -0.924626055589704 0.44833006106219797 0.20994670504662372 -0.2012725506779131 -0.018900414287719353 2" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state = df_train.state_get()\n", "\n", "df_test.state_set(state)\n", "df_test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And just like that `df_test` contains all the columns, transformations and the prediction we modelled on the training set. The state can be easily serialized to disk in a form of a JSON file. This makes deployment of a machine learning model as trivial as simply copying a JSON file from one environment to another." ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "ExecuteTime": { "end_time": "2020-07-14T16:10:27.647113Z", "start_time": "2020-07-14T16:10:27.601994Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 prediction
0 5.9 3.0 4.2 1.5 1 2.80000000000000031.9666666666666668-1.642627940409072 0.49931302910747727 -0.063088008066644660.10842057110641677 -0.03924298664189224-0.0273944397002728221
1 6.1 3.0 4.6 1.4 1 3.28571428571428562.033333333333333 -1.445047446393471 -0.1019091578746504 -0.018990122394938010.0209807676460904080.1614215276667148 -0.02716639637934938 1
2 6.6 2.9 4.6 1.3 1 3.538461538461538 2.2758620689655173-1.330564613235537 -0.419784747491312670.1759590589290671 -0.4631301992308477 0.08304243689815374 -0.0333517336774292741
3 6.7 3.3 5.7 2.1 2 2.71428571428571442.0303030303030303-2.6719170661531013-0.9149428897499291 0.4156162725009377 0.34633692661436644 0.03742964707590906 -0.0132542861962457742
4 5.5 4.2 1.4 0.2 0 6.999999999999999 1.30952380952380953.6322930267831404 0.8198526437905096 1.046277579362938 0.09738737839850209 0.09412658096734221 0.1329137026697501 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
255.5 2.5 4.0 1.3 1 3.07692307692307662.2 -1.25231200886008960.5975071562677784 -0.7019801415469216 -0.11489031841855571-0.036159457820878690.005496321827264977 1
265.8 2.7 3.9 1.2 1 3.25 2.148148148148148 -1.07923521659046570.5236883751378523 -0.34037717939532286-0.23743695029955128-0.00936891422024664-0.02184110533380834 1
274.4 2.9 1.4 0.2 0 6.999999999999999 1.517241379310345 3.7422969192506095 1.048460304741977 -0.636475521315278 0.07623157913054074 0.004215355833312173-0.06354157393133958 0
284.5 2.3 1.3 0.3 0 4.333333333333334 1.956521739130435 1.4537380535696471 2.4197864889383505 -1.0301500321688102 -0.5150263062576134 -0.2631218962099228 -0.06608059456656257 0
296.9 3.2 5.7 2.3 2 2.47826086956521772.15625 -2.963110301521378 -0.924626055589704 0.44833006106219797 0.20994670504662372 -0.2012725506779131 -0.0189004142877193532
" ], "text/plain": [ "# sepal_length sepal_width petal_length petal_width class_ petal_ratio sepal_ratio PCA_0 PCA_1 PCA_2 PCA_3 PCA_4 PCA_5 prediction\n", "0 5.9 3.0 4.2 1.5 1 2.8000000000000003 1.9666666666666668 -1.642627940409072 0.49931302910747727 -0.06308800806664466 0.10842057110641677 -0.03924298664189224 -0.027394439700272822 1\n", "1 6.1 3.0 4.6 1.4 1 3.2857142857142856 2.033333333333333 -1.445047446393471 -0.1019091578746504 -0.01899012239493801 0.020980767646090408 0.1614215276667148 -0.02716639637934938 1\n", "2 6.6 2.9 4.6 1.3 1 3.538461538461538 2.2758620689655173 -1.330564613235537 -0.41978474749131267 0.1759590589290671 -0.4631301992308477 0.08304243689815374 -0.033351733677429274 1\n", "3 6.7 3.3 5.7 2.1 2 2.7142857142857144 2.0303030303030303 -2.6719170661531013 -0.9149428897499291 0.4156162725009377 0.34633692661436644 0.03742964707590906 -0.013254286196245774 2\n", "4 5.5 4.2 1.4 0.2 0 6.999999999999999 1.3095238095238095 3.6322930267831404 0.8198526437905096 1.046277579362938 0.09738737839850209 0.09412658096734221 0.1329137026697501 0\n", "... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", "25 5.5 2.5 4.0 1.3 1 3.0769230769230766 2.2 -1.2523120088600896 0.5975071562677784 -0.7019801415469216 -0.11489031841855571 -0.03615945782087869 0.005496321827264977 1\n", "26 5.8 2.7 3.9 1.2 1 3.25 2.148148148148148 -1.0792352165904657 0.5236883751378523 -0.34037717939532286 -0.23743695029955128 -0.00936891422024664 -0.02184110533380834 1\n", "27 4.4 2.9 1.4 0.2 0 6.999999999999999 1.517241379310345 3.7422969192506095 1.048460304741977 -0.636475521315278 0.07623157913054074 0.004215355833312173 -0.06354157393133958 0\n", "28 4.5 2.3 1.3 0.3 0 4.333333333333334 1.956521739130435 1.4537380535696471 2.4197864889383505 -1.0301500321688102 -0.5150263062576134 -0.2631218962099228 -0.06608059456656257 0\n", "29 6.9 3.2 5.7 2.3 2 2.4782608695652177 2.15625 -2.963110301521378 -0.924626055589704 0.44833006106219797 0.20994670504662372 -0.2012725506779131 -0.018900414287719353 2" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train.state_write('./iris_model.json')\n", "\n", "df_test.state_load('./iris_model.json')\n", "df_test" ] } ], "metadata": { "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.8" } }, "nbformat": 4, "nbformat_minor": 2 }