{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Digit zero data example\n", "This is an example that illustrates how to synthesize data with `arfpy`.\n", "Each image in the data set is a 8x8 pixel representation of digit zero." ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_digits\n", "from arfpy import arf\n", "import pandas as pd\n", "from matplotlib import pyplot as plt\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "from numpy import random\n", "random.seed(seed=2023)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, we load the data:" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "df = load_digits(n_class=1) # zero digits\n", "df= df.images.reshape((len(df.images), -1))\n", "df = pd.DataFrame(df).astype('category') # set type from float to category to avoid floats in synthetic data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can define the ARF object and estimate the density" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial accuracy is 0.7471910112359551\n", "Iteration number 1 reached accuracy of 0.449438202247191.\n" ] } ], "source": [ "my_arf = arf.arf(x = df)\n", "FORDE = my_arf.forde()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Finally, we can generate some new data:" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "df_syn = my_arf.forge(n = 4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's plot some generated digits and compare them to the original mnist digits!\n" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAACyCAYAAABGKhUbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAALf0lEQVR4nO3cbYil510G8OtfV4k1NjMaqY3WTsZWbUGyZIO2RdlZ4qIFyy7SDcVKMuCHqlAzpcraVpsJoiRSyqr1LYoMmtI2G+ysflAUZRQhFTMwRYoibna3rY2WhJmk6Zt5uf1wTmRYN7ub58zk7Nz7+8HA7nme67nvZ/bmmYv7nNlqrQUAoGcvm/YEAAB2m8IDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeuEpU1fuq6o92+tzLuFarqtfuxLUAhir/Dw/sPVW1mOQ9Sb47yZNJPpHkva21rSlO64KqqiV5XWvtP6Y9F+DqZYcH9piqek+Se5P8YpLrkrwxyWuS/E1VfcMLZPa9dDN8afV8b8DOUXhgD6mqVyS5O8m7Wmt/1Vp7urV2NsltGZWenxqft1xVD1bV/VX1ZJLF8Wv3b7vW7VV1rqoer6pfqaqzVfUj2/L3j/88N35b6o6q+kxVPVZV7992nR+oqoeqaquqHq2qD79Q8TrvXt5UVU9t+/pqVZ0dH3tZVf1SVZ0ez++BqvqW8+bz01X1mSR/Nz7/l8f384Wq+pOqum58/jXj78Pj4zn+c1W9cgf+OYA9ROGBveXNSa5J8mfbX2ytPZXkL5Mc3vbykSQPJplJ8pHt51fVG5L8bpJ3JHlVRjtF33GJsX8oyfcmuTXJB6rq9ePXn03y7iTXJ3nT+PjPXepGWmsPtdauba1dm2Q2ySeTfHR8+OeTHE1yMMkNSTaT/M55lziY5PVJfjTJ4vjrUJL5JNcm+fD4vDvG9/fqJN+a5GeSfOVS8wP6ovDA3nJ9ksdaa89c4Nij4+PPe6i1ttpae661dv4P+Lcl+YvW2j+21v4nyQeSXOoDfXe31r7SWvtUkk8luSlJWmvrrbVPttaeGe82/UFGZeTF+K0kX0ry/M7RO5O8v7X2udba15IsJ3nbeW9fLbfWvjS+t3ck+VBr7ZFx+XtvkrePz386o6Lz2tbas+P5Pvki5wfscd77hr3lsSTXV9W+C5SeV42PP++zF7nODduPt9a+XFWPX2Ls/9r25y9ntIuSqvqeJB9KckuSl2f0XFm/xLX+T1W9M8lCkje21p4bv/yaJJ+oque2nfpsku1vRW2/vxuSnNv293PjebwyyZ9mtLvzsaqaSXJ/RmXq6cudI7D32eGBveWhJF9L8hPbX6yqb0ryliR/u+3li+3YPJrkO7flvzGjXZAhfi/Jv2X0m1ivSPK+JHU5war64SS/muRIa+2JbYc+m+QtrbWZbV/XtNb+c9s52+/v8xmVpOd9V5Jnkvz3+HNOd7fW3pDRW4I/nuT2F3mPwB6n8MAeMi4Fdyf57ar6sar6+qqaS3Iyyecy2s24HA8meWtVvXn8AeO7c5kl5QK+OaNfjX+qqr4vyc9eTqiqXp3k40lub639+3mHfz/Jr1XVa8bnfltVHbnI5T6a5N1VdWNVXZvk15N8vLX2TFUdqqrvr6qvG8/z6Yx2i4CriMIDe0xr7Tcy2kX5YEY/wP8pox2RW8efd7mca3w6ybuSfCyj3Z4vJvlCRrtHL9YvJPnJ8TX+MKMSczluTfLtSR7c9ptanx4f+80kf57kr6vqixl9oPkHL3KtP86o7P1DkjNJvprR/eX5MTL6Xv1rkr/P6G0t4CriPx4EMt4V2crobakzU54OwI6zwwNXqap6a1W9fPz5nw8m+ZckZ6c7K4DdofDA1etIRh/2/XyS1yV5e7PlC3TKW1oAQPfs8AAA3VN4AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCge/sucby9JLM4z8mTJyfKHz9+fHD28OHDg7P33HPP4Ozs7Ozg7A6oXb7+VNbR1tbWRPnl5eXB2ZWVlcHZhYWFwdnV1dXB2R2wm+toKmtoY2Njovzi4uLg7Nzc3ODsJGtoaWlpcHYHdPksmmQdJMna2trg7P79+wdnJ3kGTjLuDrjgOrLDAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCge9Vau9jxix7cLfPz8xPlz5w5Mzh77NixwdmTJ08Ozj7wwAODs8lk805SEw1+aVNZR0ePHp0of+rUqcHZu+66a3B2ZWVlcHZ5eXlwNkkWFxcnie/mOrrq1tC0TPL8TJK5ublJ4lfss2htbW3woIcOHRqcTZKbbrppcHZpaWlwdpLnycbGxuBskszMzEwSv+A6ssMDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7+3brwuvr64OzZ86cmWjs06dPD87Oz88Pzh4+fHhwdpLvV5IcO3ZsovyV6uzZs4Ozp06dmmjsO+64Y3B2eXl5cHZra2twdmNjY3C2V2tra4Ozk66hO++8c3B2kjW0f//+wVmuPKurq4Ozc3NzUxl3ZWVlcDZJlpaWJspfiB0eAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO7t260Lb25uDs7efPPNE409Pz8/UX6oAwcOTGXcns3MzExt7MXFxamMO817ZmedOHFiKuOeO3duKuP2bG1tbWpjz83NTWXcSZ5FN954485NZIfY4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0L19u3Xhzc3NwdnDhw/v4ExeOpPc8+zs7A7OpB8bGxvTngJ73MLCwtTG3traGpydmZkZnD148ODg7MrKyuBskiwvL0+U58qxtrY2OLu4uLhj89gpdngAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRv325deHZ2dnB2fX19B2fy4mxubg7OPvzww4Ozt9122+Bsz/bv3z+1sZ944onB2a2trcHZjY2Nwdnl5eXBWf6/6667bqL8JP8eJ06cGJydZP3Nzc0NznLlmWQtnDt3bnB2ms/uF2KHBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA96q1drHjFz14MY888sjQaA4cODA4myT33Xff4OzJkycHZ0+fPj04u76+Pji7A2qXrz94HU1iYWFhGsMmSebm5qYy7srKylTGHdvNdTSVNbS0tDRRfnV1dXD26NGjUxl3Y2NjcDZJZmZmJolfsc+itbW1wYMeOnRocDZJNjc3B2cXFxcHZye5562trcHZHXDBdWSHBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7+3brwvPz84Oz995770RjHz9+fHD2lltuGZxdX18fnGXnra6uTpRfWloanN3Y2BicXVlZGZxlZ02yBpLJ1sHa2trg7CRraGZmZnC2ZwsLC4OzR44cmWjs2dnZwdmDBw8Ozk6yBq9EdngAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHSvWmvTngMAwK6ywwMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHv/C3wLa7yCYhK8AAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAACyCAYAAABGKhUbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAL4ElEQVR4nO3dbYzlV10H8O9PVnloa6clgNCnkRgkiOxoYiBNkXnhU7Wlg4miQLNjjFh9s1OLfWFIdqoQDSY6JWqCxDpLNajRdLalJT682MZgbCBklkisaGSWYqvUtlNLwQba44u5m4zbbYf9/2e4syefT7JJd+7/e86Ze8/c+825dzvVWgsAQM++ZdoLAADYawoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4gCRJVW1U1Q/t0liXV9WXq+oFuzEewFgKD+xjVXVVVf1DVT1eVY9W1Seq6gd2YdzVqnrfbqxxMt7/K0uttS+01s5vrT29W3MAjHFg2gsAzqyqvj3Jx5L8UpK/SPJtSd6c5Klprmu/q6pKUq21Z6a9FmD/cMID+9drkqS19tHW2tOtta+21v6mtfaZqnrh5MTne09dXFUvr6qvVtXLqmq+qr5YVTdV1Zeq6qGq+rnJde9O8s4kN0/edrpr25xzVfWZyYnSn1fVi7aNf01VrVfV5uTU6Q2Tr9+e5PIkd03Gu7mqZquqVdWByTUXV9UfV9WDVfVYVa2d6RuuqhOTMU79aVU1P7ntTZN5NyfXzW/LHa+q91fVJ5J8Jcmrq+rKqvrk5Hv5ZFVdue36xar696p6oqo+X1XvHP4wAecChQf2r88lebqqjlbV1VV10akbWmtPJfmzJO/adv3PJvm71trDk79/R5ILk1yS5OeT/H5VXdRa+8Mkf5rkA5O3na7dNsZPJ/mxJN+Z5A1JFpOkqr4/yW1JfjHJS5N8KMmdVfXC1tr1Sb6Q5NrJeB84w/dye5KXJPmeJC9P8rtn+oZbawcnY5yf5FeS/EuST1fVJUnuTvK+JBcneU+Sv6qql22LX5/k3UkuSPLE5PoPTtb7O0nurqqXVtV5k69f3Vq7IMmVSdbPtB6gHwoP7FOttf9JclWSluTDSR6uqjur6hWTS44meUdVnfo5vj5bxeKUryX59dba11pr9yT5cpLv3mHaD7bWHmytPZrkriRzk6//QpIPtdbum5w2Hc3WW2tv2un7qKpXJrk6yQ2ttccm67l3h8xV2So3b53cD+9Kck9r7Z7W2jOttb9N8qkkP74tttpa+2xr7etJfiTJv7bWbm+tfb219tEk9yc5Ve6eSfL6qnpxa+2h1tpnd/o+gHObwgP7WGvtn1tri621S5O8PsmrkqxMbrsvyZNJ3lJVr03yXUnu3BZ/ZPLif8pXkpy/w5T/+RzXX5HkpsnbSZtVtZnkssl6dnJZkkdba499A9emqi7L1meWDrXWPrdt/p86bf6rkrxyW/SBbf/9qiQnTxv6ZJJLWmtPJnl7khuSPFRVd0/uP6BjCg+cI1pr9ydZzVbxOeVotk4/rk/yl621//1GhzvL6R9I8v7W2sy2Py+ZnJzsNN4DSS6uqpmdJqmqFydZS7LSWvv4aWPcftr857XWfmvbNdvX8GC2StJ2lyf5jyRprf11a+2Hs1WY7s/WCRrQMYUH9qmqeu3kQ8eXTv5+WbY+p/OP2y67PcnbslV6PnIWw/9XklefxfUfTnJDVb2xtpxXVT9RVRfsNF5r7aEkH0/yB1V1UVV9a1X94HPMc1uS+8/wOaA/SXJtVf1oVb2gql40+WD2pc8xzj1JXlNV76iqA1X19iSvS/KxqnpFVb118lmep7L1Vp9/Pg+dU3hg/3oiyRuT3FdVT2ar6PxTkptOXdBa+2KST2frdOPvz2LsP0ryusnbQ2s7Xdxa+1S2Psfze0keS/JvmXygeeI3k7x3Mt57zjDE9dn6TNH9Sb6UZOk5pvqZJG877V9qvbm19kCS65L8WpKHs3Xi86t5juew1tojSa7J1n31SJKbk1zTWvvvSeambJ0CPZrkLUl+eaf7ADi3VWtne7IN7CdVdVuSB1tr7532WgD2K//jQTiHVdVskp9M8n1TXgrAvuYtLThHVdVvZOstrt9urX1+2usB2M+8pQUAdM8JDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6d2CH29s3ZRWnWV1dHZVfWloanJ2ZmRmcHbPu+fn5wdldUHs8/lT20cbGxqj84uLiVOZeX18fnB2zf3fBXu6jc3IPjfm5HvNY3nLLLYOz11133eDsLujyuWisMXthbm5ucHZtbW1wdj8+FznhAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQvWqtPd/tz3vj89nY2BgazcLCwuDs2LmXlpYGZ1dWVgZnNzc3B2d3Qe3x+IP30Rjz8/PTmDZJMjs7Ozi7vr4+lewu2Mt9NJU9NOZxTJKTJ08Ozh4+fHhw9vjx44OzHe+h5BzdR2OMeW0Z83q4vLw8OLsLzriPnPAAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOjegb0a+MSJE3s19I42Nzenkh1jdXV1VH5xcXFX1tGTsY/l0tLS4OyYx2N+fn5w1j56tmPHjg3Onjx5ctTchw8fHpxdWVkZnB3zOG5sbAzOJsns7Oyo/H61vr4+ODt2Hx05cmRwdnl5eXB2YWFhcHbs8+/MzMyo/Jk44QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDo3oG9GviOO+4YnJ2fn9+9hZylmZmZwdn19fXB2ePHjw/OJsni4uKo/H415n45ceLEqLnHPJ5jrKysDM6urq7u2jpIDh06NCo/5rEcY8zzwdg9tLy8PCq/X21ubk5t7mndp2OeA9fW1kbNvRevaU54AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB078BeDTw3Nzc4O+ZX0k/T5ubm4OzMzMyuraMnY/bCwYMHR829srIyKj/U0tLS4OyYn7teXXHFFYOzGxsbu7eQb6LHH3982kugA2Ne0/bjz44THgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3VN4AIDuKTwAQPcUHgCgewoPANA9hQcA6J7CAwB0T+EBALqn8AAA3Tsw7QWcydhfKz8mPzs7Ozg7Pz8/OLu+vj4427OFhYXB2XP1Ph2zj3i2kydPDs7ee++9u7iSc8Px48envYR9aWZmZtpLGGTM80lvz79OeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdO/AXg085lfS33jjjaPmXlxcnEp2fX19cHZ5eXlwtmezs7NTySbj9vCYudfW1gZn7aNnO3jw4ODshRdeOGrupaWlwdm5ubnB2TH7YMyaezbm8RizB5OkqqYy99GjRwdnDx8+PDi7V5zwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHRP4QEAuqfwAADdU3gAgO4pPABA9xQeAKB7Cg8A0D2FBwDonsIDAHSvWmvPd/vz3rhXlpaWRuVvvfXW3VnIWTp06NDg7Orq6u4t5OzVHo8/lX107NixUfkjR44Mzm5ubg7OLiwsDM6urKwMzu6CvdxHU9lD6+vro/JjHsu5ubnB2TH7YHZ2dnB2F3T5XLSxsTEqv7i4OJW5x7wuzc/PD87ugjPuIyc8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6p/AAAN1TeACA7ik8AED3FB4AoHsKDwDQPYUHAOiewgMAdE/hAQC6V621aa8BAGBPOeEBALqn8AAA3VN4AIDuKTwAQPcUHgCgewoPANC9/wORpdp/BmmQbQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))\n", "for ax, image in zip(axes, df):\n", " ax.set_axis_off()\n", " image = df.iloc[image,].to_numpy().reshape(8,8)\n", " ax.imshow(image, cmap=plt.cm.gray_r, interpolation=\"nearest\")\n", "fig.suptitle(\"Original zeros\")\n", "fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))\n", "fig.suptitle(\"Synthetic zeros\")\n", "for ax, image in zip(axes, df_syn):\n", " ax.set_axis_off()\n", " image = df_syn.iloc[image,].to_numpy().reshape(8,8)\n", " ax.imshow(image, cmap=plt.cm.gray_r, interpolation=\"nearest\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }