Upload New File

081ff745 · Anurag Kumar · db251d56 · 081ff745
Commit 081ff745 authored Nov 29, 2021 by Anurag Kumar
Show whitespace changes
Inline Side-by-side

Showing with 219 additions and 0 deletions

ML_Models/Linear_Regression.ipynb ML_Models/Linear_Regression.ipynb +219 -0

No files found.
--- a/ML_Models/Linear_Regression.ipynb
+++ b/ML_Models/Linear_Regression.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Linear Regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn import preprocessing\n",
+    "from sklearn.preprocessing import StandardScaler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3964, 60)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"dataset/dev.csv\")\n",
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index([' timedelta', ' n_tokens_title', ' n_tokens_content',\n",
+       "       ' n_unique_tokens', ' n_non_stop_words', ' n_non_stop_unique_tokens',\n",
+       "       ' num_hrefs', ' num_self_hrefs', ' num_imgs', ' num_videos',\n",
+       "       ' average_token_length', ' num_keywords', ' data_channel_is_lifestyle',\n",
+       "       ' data_channel_is_entertainment', ' data_channel_is_bus',\n",
+       "       ' data_channel_is_socmed', ' data_channel_is_tech',\n",
+       "       ' data_channel_is_world', ' kw_min_min', ' kw_max_min', ' kw_avg_min',\n",
+       "       ' kw_min_max', ' kw_max_max', ' kw_avg_max', ' kw_min_avg',\n",
+       "       ' kw_max_avg', ' kw_avg_avg', ' self_reference_min_shares',\n",
+       "       ' self_reference_max_shares', ' self_reference_avg_sharess',\n",
+       "       ' weekday_is_monday', ' weekday_is_tuesday', ' weekday_is_wednesday',\n",
+       "       ' weekday_is_thursday', ' weekday_is_friday', ' weekday_is_saturday',\n",
+       "       ' weekday_is_sunday', ' is_weekend', ' LDA_00', ' LDA_01', ' LDA_02',\n",
+       "       ' LDA_03', ' LDA_04', ' global_subjectivity',\n",
+       "       ' global_sentiment_polarity', ' global_rate_positive_words',\n",
+       "       ' global_rate_negative_words', ' rate_positive_words',\n",
+       "       ' rate_negative_words', ' avg_positive_polarity',\n",
+       "       ' min_positive_polarity', ' max_positive_polarity',\n",
+       "       ' avg_negative_polarity', ' min_negative_polarity',\n",
+       "       ' max_negative_polarity', ' title_subjectivity',\n",
+       "       ' title_sentiment_polarity', ' abs_title_subjectivity',\n",
+       "       ' abs_title_sentiment_polarity', ' shares'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_data = df.iloc[:, :-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(3964, 1)"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_col = np.array(df[' shares']).reshape(-1, 1)\n",
+    "target_col.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train, x_test, y_train, y_test = train_test_split(input_data, target_col, test_size = 0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# scaler = StandardScaler()\n",
+    "\n",
+    "# scaler.fit(x_train)\n",
+    "\n",
+    "# x_train = scaler.transform(x_train)\n",
+    "# x_test = scaler.transform(x_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lr = LinearRegression()\n",
+    "lr.fit(x_train, y_train)\n",
+    "y_pred = lr.predict(x_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import mean_squared_error\n",
+    "mse_loss = mean_squared_error(y_pred, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.08413953535946381"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mse_loss"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}