Upload New File

ccce059a · Anurag Kumar · 081ff745 · ccce059a
Commit ccce059a authored Nov 29, 2021 by Anurag Kumar
Show whitespace changes
Inline Side-by-side

Showing with 417 additions and 0 deletions

ML_Models/Logistic_Regression.ipynb ML_Models/Logistic_Regression.ipynb +417 -0

No files found.
--- a/ML_Models/Logistic_Regression.ipynb
+++ b/ML_Models/Logistic_Regression.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Logistic Regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"dataset/breast_cancer.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>diagnosis</th>\n",
+       "      <th>radius_mean</th>\n",
+       "      <th>texture_mean</th>\n",
+       "      <th>perimeter_mean</th>\n",
+       "      <th>area_mean</th>\n",
+       "      <th>smoothness_mean</th>\n",
+       "      <th>compactness_mean</th>\n",
+       "      <th>concavity_mean</th>\n",
+       "      <th>concave points_mean</th>\n",
+       "      <th>...</th>\n",
+       "      <th>texture_worst</th>\n",
+       "      <th>perimeter_worst</th>\n",
+       "      <th>area_worst</th>\n",
+       "      <th>smoothness_worst</th>\n",
+       "      <th>compactness_worst</th>\n",
+       "      <th>concavity_worst</th>\n",
+       "      <th>concave points_worst</th>\n",
+       "      <th>symmetry_worst</th>\n",
+       "      <th>fractal_dimension_worst</th>\n",
+       "      <th>Unnamed: 32</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>842302</td>\n",
+       "      <td>M</td>\n",
+       "      <td>17.99</td>\n",
+       "      <td>10.38</td>\n",
+       "      <td>122.80</td>\n",
+       "      <td>1001.0</td>\n",
+       "      <td>0.11840</td>\n",
+       "      <td>0.27760</td>\n",
+       "      <td>0.3001</td>\n",
+       "      <td>0.14710</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.33</td>\n",
+       "      <td>184.60</td>\n",
+       "      <td>2019.0</td>\n",
+       "      <td>0.1622</td>\n",
+       "      <td>0.6656</td>\n",
+       "      <td>0.7119</td>\n",
+       "      <td>0.2654</td>\n",
+       "      <td>0.4601</td>\n",
+       "      <td>0.11890</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>842517</td>\n",
+       "      <td>M</td>\n",
+       "      <td>20.57</td>\n",
+       "      <td>17.77</td>\n",
+       "      <td>132.90</td>\n",
+       "      <td>1326.0</td>\n",
+       "      <td>0.08474</td>\n",
+       "      <td>0.07864</td>\n",
+       "      <td>0.0869</td>\n",
+       "      <td>0.07017</td>\n",
+       "      <td>...</td>\n",
+       "      <td>23.41</td>\n",
+       "      <td>158.80</td>\n",
+       "      <td>1956.0</td>\n",
+       "      <td>0.1238</td>\n",
+       "      <td>0.1866</td>\n",
+       "      <td>0.2416</td>\n",
+       "      <td>0.1860</td>\n",
+       "      <td>0.2750</td>\n",
+       "      <td>0.08902</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>84300903</td>\n",
+       "      <td>M</td>\n",
+       "      <td>19.69</td>\n",
+       "      <td>21.25</td>\n",
+       "      <td>130.00</td>\n",
+       "      <td>1203.0</td>\n",
+       "      <td>0.10960</td>\n",
+       "      <td>0.15990</td>\n",
+       "      <td>0.1974</td>\n",
+       "      <td>0.12790</td>\n",
+       "      <td>...</td>\n",
+       "      <td>25.53</td>\n",
+       "      <td>152.50</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>0.1444</td>\n",
+       "      <td>0.4245</td>\n",
+       "      <td>0.4504</td>\n",
+       "      <td>0.2430</td>\n",
+       "      <td>0.3613</td>\n",
+       "      <td>0.08758</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>84348301</td>\n",
+       "      <td>M</td>\n",
+       "      <td>11.42</td>\n",
+       "      <td>20.38</td>\n",
+       "      <td>77.58</td>\n",
+       "      <td>386.1</td>\n",
+       "      <td>0.14250</td>\n",
+       "      <td>0.28390</td>\n",
+       "      <td>0.2414</td>\n",
+       "      <td>0.10520</td>\n",
+       "      <td>...</td>\n",
+       "      <td>26.50</td>\n",
+       "      <td>98.87</td>\n",
+       "      <td>567.7</td>\n",
+       "      <td>0.2098</td>\n",
+       "      <td>0.8663</td>\n",
+       "      <td>0.6869</td>\n",
+       "      <td>0.2575</td>\n",
+       "      <td>0.6638</td>\n",
+       "      <td>0.17300</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>84358402</td>\n",
+       "      <td>M</td>\n",
+       "      <td>20.29</td>\n",
+       "      <td>14.34</td>\n",
+       "      <td>135.10</td>\n",
+       "      <td>1297.0</td>\n",
+       "      <td>0.10030</td>\n",
+       "      <td>0.13280</td>\n",
+       "      <td>0.1980</td>\n",
+       "      <td>0.10430</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.67</td>\n",
+       "      <td>152.20</td>\n",
+       "      <td>1575.0</td>\n",
+       "      <td>0.1374</td>\n",
+       "      <td>0.2050</td>\n",
+       "      <td>0.4000</td>\n",
+       "      <td>0.1625</td>\n",
+       "      <td>0.2364</td>\n",
+       "      <td>0.07678</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 33 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \\\n",
+       "0    842302         M        17.99         10.38          122.80     1001.0   \n",
+       "1    842517         M        20.57         17.77          132.90     1326.0   \n",
+       "2  84300903         M        19.69         21.25          130.00     1203.0   \n",
+       "3  84348301         M        11.42         20.38           77.58      386.1   \n",
+       "4  84358402         M        20.29         14.34          135.10     1297.0   \n",
+       "\n",
+       "   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \\\n",
+       "0          0.11840           0.27760          0.3001              0.14710   \n",
+       "1          0.08474           0.07864          0.0869              0.07017   \n",
+       "2          0.10960           0.15990          0.1974              0.12790   \n",
+       "3          0.14250           0.28390          0.2414              0.10520   \n",
+       "4          0.10030           0.13280          0.1980              0.10430   \n",
+       "\n",
+       "      ...       texture_worst  perimeter_worst  area_worst  smoothness_worst  \\\n",
+       "0     ...               17.33           184.60      2019.0            0.1622   \n",
+       "1     ...               23.41           158.80      1956.0            0.1238   \n",
+       "2     ...               25.53           152.50      1709.0            0.1444   \n",
+       "3     ...               26.50            98.87       567.7            0.2098   \n",
+       "4     ...               16.67           152.20      1575.0            0.1374   \n",
+       "\n",
+       "   compactness_worst  concavity_worst  concave points_worst  symmetry_worst  \\\n",
+       "0             0.6656           0.7119                0.2654          0.4601   \n",
+       "1             0.1866           0.2416                0.1860          0.2750   \n",
+       "2             0.4245           0.4504                0.2430          0.3613   \n",
+       "3             0.8663           0.6869                0.2575          0.6638   \n",
+       "4             0.2050           0.4000                0.1625          0.2364   \n",
+       "\n",
+       "   fractal_dimension_worst  Unnamed: 32  \n",
+       "0                  0.11890          NaN  \n",
+       "1                  0.08902          NaN  \n",
+       "2                  0.08758          NaN  \n",
+       "3                  0.17300          NaN  \n",
+       "4                  0.07678          NaN  \n",
+       "\n",
+       "[5 rows x 33 columns]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',\n",
+       "       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',\n",
+       "       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',\n",
+       "       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',\n",
+       "       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',\n",
+       "       'fractal_dimension_se', 'radius_worst', 'texture_worst',\n",
+       "       'perimeter_worst', 'area_worst', 'smoothness_worst',\n",
+       "       'compactness_worst', 'concavity_worst', 'concave points_worst',\n",
+       "       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# converting text value into classifier (or, number)\n",
+    "from sklearn import preprocessing\n",
+    "Label = preprocessing.LabelEncoder()\n",
+    "\n",
+    "diagnosis = Label.fit_transform(df['diagnosis'])\n",
+    "# diagnosis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "train, test = train_test_split(df, random_state = 40)\n",
+    "x_train = train[train.columns[2:30]]\n",
+    "y_train = train['diagnosis']\n",
+    "\n",
+    "x_test = test[test.columns[2:30]]\n",
+    "y_test = test['diagnosis']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "\n",
+    "scaler.fit(x_train)\n",
+    "\n",
+    "x_train = scaler.transform(x_train)\n",
+    "x_test = scaler.transform(x_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "lr = LogisticRegression()\n",
+    "lr.fit(x_train, y_train)\n",
+    "y_pred = lr.predict(x_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[96  2]\n",
+      " [ 2 43]]\n",
+      "             precision    recall  f1-score   support\n",
+      "\n",
+      "          B       0.98      0.98      0.98        98\n",
+      "          M       0.96      0.96      0.96        45\n",
+      "\n",
+      "avg / total       0.97      0.97      0.97       143\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# accuracy\n",
+    "from sklearn.metrics import confusion_matrix, classification_report\n",
+    "print(confusion_matrix(y_test, y_pred))\n",
+    "print(classification_report(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['M'], dtype=object)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# predicting one perticular output\n",
+    "x_test1 = scaler.transform(df.iloc[0:1, 2:30])\n",
+    "pred = lr.predict(x_test1)\n",
+    "pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}