Commit c0bbd4e8 authored by Mahesha999's avatar Mahesha999

Refactoring code and randomized search cv for random forest hyperparams

parents
This source diff could not be displayed because it is too large. You can view the blob instead.
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
def elbow(X_train):
wcss = []
for i in range(1,20):
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, random_state=0, n_init=10)
kmeans.fit(X_train)
wcss.append(kmeans.inertia_) # Sum of squared distances of samples to their closest cluster center.
plt.plot(range(1,20),wcss)
plt.title("The Elbow Method")
plt.ylabel("no. of clusters")
plt.xlabel("WCSS")
plt.show()
def kmeans_fit_predict(X_train, X_test):
kmeans = KMeans(n_clusters=10, init="k-means++", max_iter=1000, random_state=0, n_init=10)
kmeans.fit(X_train)
return kmeans.predict(X_test)
\ No newline at end of file
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from util import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = load_scale_xy_with_25p_split()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Random Forest"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from random_forest import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Search for best hyperparameters by doing randomized cross validation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 3 folds for each of 100 candidates, totalling 300 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
"[Parallel(n_jobs=-1)]: Done 25 tasks | elapsed: 2.0min\n",
"[Parallel(n_jobs=-1)]: Done 146 tasks | elapsed: 10.4min\n",
"[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 21.7min finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 74.8\n",
"Hyperparamters: {'n_estimators': 1000, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'auto', 'max_depth': 50, 'bootstrap': False}\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAECCAYAAADXWsr9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAALXklEQVR4nO3d3Ytd5RmG8fvOnokxSWMsrbTJBBPQpqRCjQyiBqQkQmsVPWgpFhQUYeiXRhFEe+IfUBE9KJZprD0w1YOYgyKt2qIeSGns5AM1GYup2iQaMYXWkZg4H3l6MCMkM2P2Gme9s/b2uX4gZBbbNw8rc2XtvWftN44IAfhiW9T0AADKI3QgAUIHEiB0IAFCBxIgdCCBxkK3/T3b/7R90PZ9Tc1Rle01tl+0fcD2fttbm56pCtst23ttP9P0LFXYXml7h+03bA/bvrLpmdqxfffU98Trtp+0vaTpmaZrJHTbLUm/lnStpA2Sfmx7QxOzzMG4pHsiYoOkKyT9vAtmlqStkoabHmIOHpH0bER8U9K31eGz214t6U5J/RFxiaSWpJuanWqmpq7ol0s6GBFvRcSopKck3djQLJVExNGI2DP16480+Q24utmpzs52n6TrJG1repYqbJ8n6WpJj0lSRIxGxP8aHaqaHknn2u6RtFTSew3PM0NToa+WdPi0r4+ow6M5ne21kjZK2tXwKO08LOleSacanqOqdZKOSXp86uXGNtvLmh7qbCLiXUkPSjok6aikDyPi+Wanmok34+bI9nJJT0u6KyJGmp7ns9i+XtIHEbG76VnmoEfSZZIejYiNko5L6uj3b2yfr8lno+skrZK0zPbNzU41U1OhvytpzWlf900d62i2ezUZ+faI2Nn0PG1sknSD7Xc0+dJos+0nmh2prSOSjkTEp8+Udmgy/E52jaS3I+JYRIxJ2inpqoZnmqGp0P8h6WLb62wv1uSbF39saJZKbFuTrx2HI+KhpudpJyLuj4i+iFiryfP7QkR03JXmdBHxvqTDttdPHdoi6UCDI1VxSNIVtpdOfY9sUQe+gdjTxG8aEeO2fyHpOU2+S/m7iNjfxCxzsEnSLZJes71v6tgvI+JPzY30hXSHpO1TF4C3JN3W8DxnFRG7bO+QtEeTP5nZK2mw2almMh9TBb74eDMOSIDQgQQIHUiA0IEECB1IoPHQbQ80PcNcdNu8EjMvhE6ft/HQJXX0CZpFt80rMfNC6Oh5OyF0AIUVuWGmZ8XS6L1gZaXHjo98rJ4VSys9tvdfJ+cx1dm5Ve3vvNFTJ7V4UfV9BWKi+Q+OjekT9eqcyo+vei5K6pTzXOr7QpLUan2Oic7uxPiIRidOePrxIrfA9l6wUmt/Vf8zmb4flLtLtrV8RZF1J0Y69gNun6nUuSip1HkueS58Xv1r/+39P8x6vPm/ugEUR+hAAoQOJEDoQAKEDiRQKfRu24MdwJnaht6le7ADOE2VK3rX7cEO4ExVQu/qPdgB1PhmnO0B20O2h8ZHPq5rWQA1qBJ6pT3YI2IwIvojor/qvesAFkaV0LtuD3YAZ2r7oZYu3YMdwGkqfXpt6h8p4B8qALoUd8YBCRA6kAChAwkQOpAAoQMJFNkzbvE7o7rwtsPtHzhHw7+5vPY1P/WNn7xSbO1uU2IvM0kaP3ykyLolFd3zr8DaEWOzHueKDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAkW2e44l52hiw9ra1y25JfP1+/9bZN0/X3lhkXWlwlsRo7jWt9bXvqYPvjzrca7oQAKEDiRA6EAChA4kQOhAAoQOJEDoQAJtQ7e9xvaLtg/Y3m9760IMBqA+VW6YGZd0T0Tssf0lSbtt/yUiDhSeDUBN2l7RI+JoROyZ+vVHkoYlrS49GID6zOk1uu21kjZK2lVkGgBFVA7d9nJJT0u6KyJm3GRte8D2kO2hsbHjdc4IYJ4qhW67V5ORb4+InbM9JiIGI6I/Ivp7e5fVOSOAearyrrslPSZpOCIeKj8SgLpVuaJvknSLpM2290399/3CcwGoUdsfr0XEy5K8ALMAKIQ744AECB1IgNCBBAgdSIDQgQSK7AKr4yekv79aZOlSSu3WOrB7T5F1Jem3m79TZN3xw0eKrNuNWitWlFv88NH61xwbm/UwV3QgAUIHEiB0IAFCBxIgdCABQgcSIHQgAUIHEiB0IAFCBxIgdCABQgcSIHQgAUIHEiB0IAFCBxIgdCABQgcSIHQgAUIHEiB0IAFCBxIost2zF/eq52t9ta9bchviiZGRIus+evFFRdaVpEv3ljkf+zYWWbbo1sml/vxOXn5xkXUlackrbxZbezqu6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAClUO33bK91/YzJQcCUL+5XNG3ShouNQiAciqFbrtP0nWStpUdB0AJVa/oD0u6V9KpcqMAKKVt6Lavl/RBROxu87gB20O2h0YnTtQ2IID5q3JF3yTpBtvvSHpK0mbbT0x/UEQMRkR/RPQvbp1b85gA5qNt6BFxf0T0RcRaSTdJeiEibi4+GYDa8HN0IIE5fR49Il6S9FKRSQAUwxUdSIDQgQQIHUiA0IEECB1IoMgusDE6VmTH1p419e8sW1rJnWtL7db60zcPFlm35I64pfT+9aw3hM5PwV1xp+OKDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kUGQX2FJK7qhaSqvgTp8TIyNF1i21W+tz7+0rsq4kfXfVpUXWLbnzcInv54hTsx7nig4kQOhAAoQOJEDoQAKEDiRA6EAChA4kUCl02ytt77D9hu1h21eWHgxAfareMPOIpGcj4oe2F0taWnAmADVrG7rt8yRdLelWSYqIUUmjZccCUKcqT93XSTom6XHbe21vs72s8FwAalQl9B5Jl0l6NCI2Sjou6b7pD7I9YHvI9tCYPql5TADzUSX0I5KORMSuqa93aDL8M0TEYET0R0R/r86pc0YA89Q29Ih4X9Jh2+unDm2RdKDoVABqVfVd9zskbZ96x/0tSbeVGwlA3SqFHhH7JPWXHQVAKdwZByRA6EAChA4kQOhAAoQOJEDoQAJdtd1zNyq1JXM3KrUlsyT9/tDLRda9/drbi6y70LiiAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJFNkF1q1Fai1fUfu67Kh6ptaK+s+x1J3n+dYf/azIugfvaxVZV5IuuqXY0jNwRQcSIHQgAUIHEiB0IAFCBxIgdCABQgcSqBS67btt77f9uu0nbS8pPRiA+rQN3fZqSXdK6o+ISyS1JN1UejAA9an61L1H0rm2eyQtlfReuZEA1K1t6BHxrqQHJR2SdFTShxHxfOnBANSnylP38yXdKGmdpFWSltm+eZbHDdgesj00eupk/ZMC+NyqPHW/RtLbEXEsIsYk7ZR01fQHRcRgRPRHRP/iRbxXB3SSKqEfknSF7aW2LWmLpOGyYwGoU5XX6Lsk7ZC0R9JrU//PYOG5ANSo0ufRI+IBSQ8UngVAIdwZByRA6EAChA4kQOhAAoQOJEDoQAJFtntWb6+05uv1r7u/3DbEPWv6iqw7vvrLRdaVpIm/v1pk3VLnIj4s9+dX6lysP1BmS21J0oural/SA72zHueKDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4k4Iiof1H7mKR/V3z4VyT9p/Yhyum2eSVmXgidMu+FEfHV6QeLhD4Xtocior/RIeag2+aVmHkhdPq8PHUHEiB0IIFOCH2w6QHmqNvmlZh5IXT0vI2/RgdQXidc0QEURuhAAoQOJEDoQAKEDiTwfzhZngIsUMbSAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"search_fit_predict_print(randomized_search_cv_rf_fit_predict, X_train, y_train, X_test, y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Use hyper parameters obtained with randomized search cross validation"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 76.0\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAECCAYAAADXWsr9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAALR0lEQVR4nO3dXWjd9R3H8c/HPPRJU4WNiU2xBV1HFWYliFpwWytsTtGLjeFAmSKUPWkVQXQ3wq5F9GI4sjp3YdGLWtiQoW44L4RZlz6gtvGhU9dWK3YXa7RUk6bfXeSIbZr1/LP8f/mf0+/7BUJzPP35Jead3zkn//OLI0IAzmxnNT0AgPIIHUiA0IEECB1IgNCBBAgdSKCx0G1/z/Zbtvfavr+pOaqyvdz232zvsb3b9samZ6rCdo/tnbafbXqWKmyfa3uL7Tdtj9q+qumZ2rF9T+tr4g3bT9le2PRM0zUSuu0eSb+RdJ2k1ZJ+bHt1E7PMwjFJ90bEaklXSvpFF8wsSRsljTY9xCw8Kum5iPiGpG+qw2e3vUzSXZKGIuJSST2Sbm52qlM1taNfIWlvRLwbEeOSnpZ0U0OzVBIRByNiR+vPn2jqC3BZs1Odnu1BSddL2tT0LFXYXirpGkmPS1JEjEfEfxodqppeSYts90paLOnDhuc5RVOhL5O0/4SPD6jDozmR7RWS1kja1vAo7Twi6T5Jxxueo6qVkg5JeqL1dGOT7SVND3U6EfGBpIck7ZN0UNLhiHih2alOxYtxs2T7bEnPSLo7Isaanud/sX2DpI8jYnvTs8xCr6TLJT0WEWskHZHU0a/f2D5PU49GV0q6QNIS27c0O9Wpmgr9A0nLT/h4sHVbR7Pdp6nIN0fE1qbnaWOtpBttv6+pp0brbD/Z7EhtHZB0ICK+eKS0RVPhd7JrJb0XEYciYkLSVklXNzzTKZoK/R+SLra90na/pl68+FNDs1Ri25p67jgaEQ83PU87EfFARAxGxApNfX5fjIiO22lOFBEfSdpve1XrpvWS9jQ4UhX7JF1pe3Hra2S9OvAFxN4m/qMRccz2LyU9r6lXKX8fEbubmGUW1kq6VdLrtne1bvtVRPy5uZHOSHdK2tzaAN6VdHvD85xWRGyzvUXSDk39ZGanpOFmpzqVeZsqcObjxTggAUIHEiB0IAFCBxIgdCCBxkO3vaHpGWaj2+aVmHk+dPq8jYcuqaM/QTPotnklZp4PHT1vJ4QOoLAiF8z0L10Ui84fqHTf8cNH1b90UaX7xtsTcxmrFhP6XH1a0PQYszLbmd3T/Pf/8eOfqf+s6uc3xGSZN+hV/VzMdt5Sjh7/VOPHP/P024tcArvo/AFdNVz/e+8nv9Nxb/M9I/WcXe2bdCeZHCvzRsJu+1z8/dM/znh789+6ARRH6EAChA4kQOhAAoQOJFAp9G47gx3AydqG3qVnsAM4QZUdvevOYAdwsiqhd/UZ7ABqfDHO9gbbI7ZHxg8frWtZADWoEnqlM9gjYjgihiJiqOq16wDmR5XQu+4MdgAna/umli49gx3ACSq9e631Swr4RQVAl+LKOCABQgcSIHQgAUIHEiB0IIEyvzb5n5PSTZ/Wvuzbv72i9jW/8PWfvlps7W7jpWXOSTu2/0CRdUsqdRZdKREzH5LJjg4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAJFjnuOhQs0uXpF7euWPJL5sp1l1n3jxsEyC6s7j0/Gl3ouWVX7mt778oy3s6MDCRA6kAChAwkQOpAAoQMJEDqQAKEDCbQN3fZy23+zvcf2btsb52MwAPWpcsHMMUn3RsQO2+dI2m77LxGxp/BsAGrSdkePiIMRsaP1508kjUpaVnowAPWZ1XN02yskrZG0rcg0AIqoHLrtsyU9I+nuiBib4d9vsD1ie2Ri4kidMwKYo0qh2+7TVOSbI2LrTPeJiOGIGIqIob6+JXXOCGCOqrzqbkmPSxqNiIfLjwSgblV29LWSbpW0zvau1j/fLzwXgBq1/fFaRLwsyfMwC4BCuDIOSIDQgQQIHUiA0IEECB1IoMgpsDpyVHrltSJLl/L6twaKrLth+0tF1pWk3637dpF1OV32Sz0DZb4uJEn7D9a/5sTEjDezowMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kECR457d36fe8wdrX7fkMcSTY2NF1n3s4ouKrCtJl+0s8/nYtabIskWPTi71/++zKy4usq4kLXz1nWJrT8eODiRA6EAChA4kQOhAAoQOJEDoQAKEDiRQOXTbPbZ32n625EAA6jebHX2jpNFSgwAop1LotgclXS9pU9lxAJRQdUd/RNJ9ko6XGwVAKW1Dt32DpI8jYnub+22wPWJ7ZHzyaG0DApi7Kjv6Wkk32n5f0tOS1tl+cvqdImI4IoYiYqi/Z1HNYwKYi7ahR8QDETEYESsk3SzpxYi4pfhkAGrDz9GBBGb1fvSIeEnSS0UmAVAMOzqQAKEDCRA6kAChAwkQOpBAkVNgY3yiyImtPZesqn3NL3jsSJF1S55cW+q01p+9s7fIuiVPxC2l76+nvSB0TiYLrBkx81Xq7OhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAJFToEtZXL3W02PMGs9AwPF1p4cGyuybqnTWp//cFeRdSXpuxdcVmTd3uWDRdaVyp4QPB07OpAAoQMJEDqQAKEDCRA6kAChAwkQOpBApdBtn2t7i+03bY/avqr0YADqU/WCmUclPRcRP7TdL2lxwZkA1Kxt6LaXSrpG0m2SFBHjksbLjgWgTlUeuq+UdEjSE7Z32t5ke0nhuQDUqErovZIul/RYRKyRdETS/dPvZHuD7RHbIxP6vOYxAcxFldAPSDoQEdtaH2/RVPgniYjhiBiKiKE+LahzRgBz1Db0iPhI0n7bq1o3rZe0p+hUAGpV9VX3OyVtbr3i/q6k28uNBKBulUKPiF2ShsqOAqAUrowDEiB0IAFCBxIgdCABQgcSIHQgga467rkblTqSuRuVOpJZkv6w7+Ui695x3R1F1p1v7OhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKcAtvFegYGiqzbjSfX3vajnxdZ98CvJ4usK0mDPyi29CnY0YEECB1IgNCBBAgdSIDQgQQIHUiA0IEEKoVu+x7bu22/Yfsp2wtLDwagPm1Dt71M0l2ShiLiUkk9km4uPRiA+lR96N4raZHtXkmLJX1YbiQAdWsbekR8IOkhSfskHZR0OCJeKD0YgPpUeeh+nqSbJK2UdIGkJbZvmeF+G2yP2B6Z0Of1Twrg/1blofu1kt6LiEMRMSFpq6Srp98pIoYjYigihvq0oO45AcxBldD3SbrS9mLblrRe0mjZsQDUqcpz9G2StkjaIen11t8ZLjwXgBpVej96RDwo6cHCswAohCvjgAQIHUiA0IEECB1IgNCBBAgdSKDIcc9euEA9F62qf+H9B+tfs8VLyxydHANLiqwrqejno4RSx1NL0uQrrxVZ98Lby838tVfOqX3N/p/MvHezowMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCTgi6l/UPiTpXxXv/hVJ/659iHK6bV6JmedDp8x7YUR8dfqNRUKfDdsjETHU6BCz0G3zSsw8Hzp9Xh66AwkQOpBAJ4Q+3PQAs9Rt80rMPB86et7Gn6MDKK8TdnQAhRE6kAChAwkQOpAAoQMJ/Bdo15FvGsWfVAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fit_predict_print(rf_fit_predict, X_train, y_train, X_test, y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K-Means"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from k_means import *\n",
"X, y = load_scale_x_encode_y()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find number clusters by the elbow method"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"elbow(X)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let number of clusters be 6. "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 3.3000000000000003\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAAECCAYAAADXWsr9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAMFUlEQVR4nO3d24uc9R3H8c8ne3DdJNXUIzmgUcQDFhNZazRgwQjWKnrRFlQUlEJurGdqtRT8AxTRCxFirJQqehEtiBQPrXpRtKlrDB6ytlq1MTHixnqIMXFP317sBmOyOs+Y55dn1u/7BYHsZPz6ZZg3z8zszDOOCAH4fpvV9AIAyiN0IAFCBxIgdCABQgcSIHQggcZCt/1T2/+y/Zbtm5vaoyrbi2w/a3uD7ddtX9v0TlXY7rL9su3Hm96lCtsH215j+w3bQ7bPaHqnVmxfP3WfeM32Q7b7mt5pT42EbrtL0t2SzpN0kqRLbJ/UxC5tGJN0Y0ScJGmZpKtmwM6SdK2koaaXaMNdkp6IiBMknaIO3932AknXSBqIiJMldUm6uNmt9tbUEf3Hkt6KiLcjYkTSw5IuamiXSiJiS0Ssm/r7Nk3eARc0u9W3s71Q0vmSVje9SxW2D5J0lqT7JCkiRiLik0aXqqZb0oG2uyX1S3q/4X320lToCyS9t9vPm9Th0ezO9tGSlkpa2/Aqrdwp6SZJEw3vUdViScOS7p96urHa9uyml/o2EbFZ0u2SNkraIunTiHiq2a32xotxbbI9R9Ijkq6LiM+a3ueb2L5A0ocR8VLTu7ShW9Kpku6JiKWStkvq6NdvbM/T5KPRxZLmS5pt+7Jmt9pbU6FvlrRot58XTl3W0Wz3aDLyByPi0ab3aWG5pAttv6vJp0Zn236g2ZVa2iRpU0TseqS0RpPhd7JzJL0TEcMRMSrpUUlnNrzTXpoK/UVJx9lebLtXky9ePNbQLpXYtiafOw5FxB1N79NKRNwSEQsj4mhN3r7PRETHHWl2FxEfSHrP9vFTF62QtKHBlarYKGmZ7f6p+8gKdeALiN1N/E8jYsz2ryU9qclXKf8QEa83sUsblku6XNKrttdPXfa7iPhLcyt9L10t6cGpA8Dbkq5seJ9vFRFrba+RtE6Tv5l5WdKqZrfam/mYKvD9x4txQAKEDiRA6EAChA4kQOhAAo2Hbntl0zu0Y6btK7Hz/tDp+zYeuqSOvoGmMdP2ldh5f+jofTshdACFFXnDTK8PiD5V+9DRqL5Ujw6odN0vF5b7INMBn1T7gNfI6Hb19lTfwztGvutKLcX4eKXrtXMbl+SursrXHYmd6m3j/A0jh5Q510PPh9srXe+73Mbt3B5V7ZjYppGJnd7z8iJvge3TbJ3uFbXP/c915U42svjxnUXm9rzybpG5kjT+8cfFZpfQddC8YrM3X3pikblH3vV8kblSmdvjhU//PO3lPHQHEiB0IAFCBxIgdCABQgcSqBT6TDsHO4Cvaxn6DD0HO4DdVDmiz7hzsAP4uiqhz+hzsAOo8Z1xU5/eWSlJfeqvayyAGlQ5olc6B3tErIqIgYgY6IT3VQP4SpXQZ9w52AF8XcuH7jP0HOwAdlPpOfrUlxTwRQXADMU744AECB1IgNCBBAgdSIDQgQQa+drk7+rY37zQ9Aptm+gudxO70OwYGysy983fnlBkriQd+/t/FpnruXOLzJWkzfcfWfvMkRt6pr2cIzqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwmUOV/wnAMVS5bUPvbzhX21z9zlB48MFpnbtWhBkbmSNHbEQUXmenBDkbnH/XFrkbmS9NElpxWZe/Cfyp1ifP7lm2qfufHzkWkv54gOJEDoQAKEDiRA6EAChA4kQOhAAoQOJNAydNuLbD9re4Pt121fuz8WA1CfKm+YGZN0Y0Sssz1X0ku2n46IMu+qAFC7lkf0iNgSEeum/r5N0pCkcm/3AlC7tp6j2z5a0lJJa4tsA6CIyqHbniPpEUnXRcRn0/z7StuDtgdHR7fXuSOAfVQpdNs9moz8wYh4dLrrRMSqiBiIiIGentl17ghgH1V51d2S7pM0FBF3lF8JQN2qHNGXS7pc0tm210/9+VnhvQDUqOWv1yLi75K8H3YBUAjvjAMSIHQgAUIHEiB0IAFCBxIochbY0dmztOXM/trnzr/t+dpn7jL+k6VF5vqjHUXmSpLHJorMjbGxInPHh94sMleS4vRDi80uJXbUf9+ImP4+wREdSIDQgQQIHUiA0IEECB1IgNCBBAgdSIDQgQQIHUiA0IEECB1IgNCBBAgdSIDQgQQIHUiA0IEECB1IgNCBBAgdSIDQgQQIHUiA0IEEipzuuffTMS16fLj2ueO1T/xK77+3FJm740cLi8yVpImuMt99+fmvzigy95D7XigyV5IO3Fry3lFGkdNqx/QXc0QHEiB0IAFCBxIgdCABQgcSIHQgAUIHEqgcuu0u2y/bfrzkQgDq184R/VpJQ6UWAVBOpdBtL5R0vqTVZdcBUELVI/qdkm6SNFFuFQCltAzd9gWSPoyIl1pcb6XtQduDI+Nf1LYggH1X5Yi+XNKFtt+V9LCks20/sOeVImJVRAxExEBvV3/NawLYFy1Dj4hbImJhRBwt6WJJz0TEZcU3A1Abfo8OJNDW59Ej4jlJzxXZBEAxHNGBBAgdSIDQgQQIHUiA0IEEipwFNrpnafTwObXP7d40t/aZu4wPby0yt+epD4rMlaTuRWXOMNv3dJmdtxY6u6wkzXtrZ5G5s+aWu8999POTa585/tg/pr2cIzqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kAChAwkQOpAAoQMJEDqQAKEDCRA6kIAjovahP5j1w1jWfW7tc2NsrPaZuzz5/voic887ZlmRuVK526Pk7VzKrCUnFZk7sX5DkbmlrI2/6bP4n/e8nCM6kAChAwkQOpAAoQMJEDqQAKEDCRA6kECl0G0fbHuN7TdsD9ku97WYAGpX9WuT75L0RET8wnavpP6COwGoWcvQbR8k6SxJV0hSRIxIGim7FoA6VXnovljSsKT7bb9se7Xt2YX3AlCjKqF3SzpV0j0RsVTSdkk373kl2yttD9oeHI0va14TwL6oEvomSZsiYu3Uz2s0Gf7XRMSqiBiIiIEeH1DnjgD2UcvQI+IDSe/ZPn7qohWSZtZHeoDkqr7qfrWkB6decX9b0pXlVgJQt0qhR8R6SQNlVwFQCu+MAxIgdCABQgcSIHQgAUIHEiB0IIGqv0dvy9ihs/XhL0+rfe7h975Y+8xdzp2/pNDknYXmSrF8SZG5b19VZKyOvXR9mcEqd1rmrnnzisyVpIlt2+of+g1n6uaIDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kQOhAAoQOJEDoQAKEDiRA6EAChA4kUOQssD0f7dARD7xW+9xZhx1a+8xdJj7+pMjcjTfs9VXyten9rMzcY+7+osjcWXPnFpkrSe7rKzJ3fHi4yFxJcneR/KbFER1IgNCBBAgdSIDQgQQIHUiA0IEECB1IoFLotq+3/brt12w/ZLvMLy0BFNEydNsLJF0jaSAiTpbUJeni0osBqE/Vh+7dkg603S2pX9L75VYCULeWoUfEZkm3S9ooaYukTyPiqdKLAahPlYfu8yRdJGmxpPmSZtu+bJrrrbQ9aHtwJHbWvymA76zKQ/dzJL0TEcMRMSrpUUln7nmliFgVEQMRMdDLa3VAR6kS+kZJy2z327akFZKGyq4FoE5VnqOvlbRG0jpJr079N6sK7wWgRpU+EBsRt0q6tfAuAArhnXFAAoQOJEDoQAKEDiRA6EAChA4kUOR8sxNz+/TFWSfWPrf/r6/UPnOXz887pcjco+59s8hcSZo46ogic2Ow/lN1S5IPO6zIXEl674rjisydf1u50z3vTxzRgQQIHUiA0IEECB1IgNCBBAgdSIDQgQQIHUiA0IEECB1IgNCBBAgdSIDQgQQIHUiA0IEECB1IgNCBBAgdSIDQgQQIHUiA0IEEHBH1D7WHJf234tUPlbS19iXKmWn7Suy8P3TKvkdFxF6n2y0SejtsD0bEQKNLtGGm7Sux8/7Q6fvy0B1IgNCBBDoh9FVNL9CmmbavxM77Q0fv2/hzdADldcIRHUBhhA4kQOhAAoQOJEDoQAL/B7vHwRJUyF4gAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fit_predict_print_unsupervised(kmeans_fit_predict,X,X,y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
# def rf_fit_predict(X_train, y_train, X_test):
# classifier = RandomForestClassifier(n_estimators= 10, criterion="entropy", random_state=0)
# classifier.fit(X_train,y_train)
# print(classifier.get_params())
# return classifier.predict(X_test)
def rf_fit_predict(X_train, y_train, X_test):
classifier = RandomForestClassifier(max_samples=0.95, n_estimators= 3000, bootstrap=True, min_samples_split=2, min_samples_leaf=1, criterion="entropy", random_state=0)
# classifier = ExtraTreesClassifier(max_samples=0.75, n_estimators= 3000, bootstrap=True, min_samples_split=2, min_samples_leaf=1, criterion="entropy", random_state=0)
classifier.fit(X_train,y_train)
# print(classifier.get_params())
return classifier.predict(X_test)
def randomized_search_fold_size_rf_fit_predict(X_train, y_train, X_test):
max_samples = [0.1,0.2,0.3,0.4,0.5,0.6,0.7]
random_grid = {'max_samples': max_samples}
classifier = RandomForestClassifier(n_estimators= 5000, bootstrap=True, max_depth=40, min_samples_split=2, min_samples_leaf=1, criterion="entropy", random_state=0)
rf_random = RandomizedSearchCV(estimator = classifier, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)
print(rf_random.best_params_)
return rf_random, rf_random.predict(X_test)
def randomized_search_cv_rf_fit_predict(X_train, y_train, X_test):
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
max_features = ['auto', 'sqrt']
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
classifier = RandomForestClassifier(criterion="entropy")
rf_random = RandomizedSearchCV(estimator = classifier, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)
#print("Hyperparameters: ", rf_random.best_params_)
return rf_random, rf_random.predict(X_test)
\ No newline at end of file
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
def load_scale_xy_with_25p_split():
dataset = pd.read_csv("data/features_30_sec.csv")
X = dataset.iloc[:, 1:59].values
y = dataset.iloc[:, 59].values
#importing the dataset
X_train, X_test = train_test_split(X, test_size=0.25, random_state= 0)
y_train, y_test = train_test_split(y, test_size=0.25, random_state= 0)
#feature scaling
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
return X_train, X_test, y_train, y_test
def load_scale_x_encode_y():
dataset = pd.read_csv("data/features_30_sec.csv")
X = dataset.iloc[:, 1:59].values
y = dataset.iloc[:, 59].values
sc_X = StandardScaler()
X = sc_X.fit_transform(X)
encoder = LabelEncoder()
y = encoder.fit_transform(y)
return X, y
def get_accuracy(cm):
sum = 0
for i in range(cm.shape[0]):
sum = sum + cm[i][i]
return 100*(sum/np.sum(cm))
def fit_predict_print(fit_predict_function, X_train, y_train, X_test, y_test):
y_pred = fit_predict_function(X_train, y_train, X_test)
cm = confusion_matrix(y_test, y_pred)
#print(cm)
print("Accuracy: ", get_accuracy(cm))
plt.matshow(cm)
plt.show()
def fit_predict_print_unsupervised(fit_predict_function, X_train, X_test, y_test):
y_pred = fit_predict_function(X_train, X_test)
if y_test is not None:
cm = confusion_matrix(y_test, y_pred)
#print(cm)
print("Accuracy: ", get_accuracy(cm))
plt.matshow(cm)
plt.show()
def search_fit_predict_print(fit_predict_function, X_train, y_train, X_test, y_test):
ensemble, y_pred = fit_predict_function(X_train, y_train, X_test)
cm = confusion_matrix(y_test, y_pred)
#print(cm)
print("Accuracy: ", get_accuracy(cm))
print("Hyperparamters: ", ensemble.best_params_)
plt.matshow(cm)
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment