Commit 390923f2 authored by Nilesh Jagdish's avatar Nilesh Jagdish

Added RFRegressor

parent b411c694
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "uxa8uaiWhPuw"
},
"source": [
"# Read data into a dataframe"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pYYleLjJhr_o"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import OneHotEncoder \n",
"import numpy as np\n",
"from sklearn.ensemble import RandomForestRegressor \n",
"from matplotlib import pyplot as plt "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RP4jhT3Chr_u"
},
"outputs": [],
"source": [
"# !unzip 'final_dataset_2.zip'\n",
"df = pd.read_csv(\"final_dataset_2.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "80n8bJeDhPuy"
},
"source": [
"# Data cleaning"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 338
},
"id": "Cgw2yRgU5zBZ",
"outputId": "e87c79d9-bf2c-492e-c9db-a865f4baaab9"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Unnamed: 0.1</th>\n",
" <th>vendor_id</th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>trip_duration</th>\n",
" <th>visi</th>\n",
" <th>vism</th>\n",
" <th>fog</th>\n",
" <th>rain</th>\n",
" <th>snow</th>\n",
" <th>hail</th>\n",
" <th>thunder</th>\n",
" <th>tornado</th>\n",
" <th>holiday_or_not</th>\n",
" <th>turns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.406904e+06</td>\n",
" <td>1.406904e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1458644.0</td>\n",
" <td>1458644.0</td>\n",
" <td>1458644.0</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458643e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>7.293215e+05</td>\n",
" <td>7.293215e+05</td>\n",
" <td>1.534950e+00</td>\n",
" <td>1.664530e+00</td>\n",
" <td>-7.397349e+01</td>\n",
" <td>4.075092e+01</td>\n",
" <td>-7.397342e+01</td>\n",
" <td>4.075180e+01</td>\n",
" <td>9.594923e+02</td>\n",
" <td>9.083394e+00</td>\n",
" <td>1.462562e+01</td>\n",
" <td>6.512898e-03</td>\n",
" <td>9.604811e-02</td>\n",
" <td>2.387149e-02</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.868242e-02</td>\n",
" <td>7.547126e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.210744e+05</td>\n",
" <td>4.210744e+05</td>\n",
" <td>4.987772e-01</td>\n",
" <td>1.314242e+00</td>\n",
" <td>7.090186e-02</td>\n",
" <td>3.288119e-02</td>\n",
" <td>7.064327e-02</td>\n",
" <td>3.589056e-02</td>\n",
" <td>5.237432e+03</td>\n",
" <td>1.931623e+00</td>\n",
" <td>3.114066e+00</td>\n",
" <td>1.123877e-01</td>\n",
" <td>5.186301e-01</td>\n",
" <td>2.773470e-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.354009e-01</td>\n",
" <td>4.432504e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.435970e+01</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.218114e+01</td>\n",
" <td>1.000000e+00</td>\n",
" <td>2.000000e-01</td>\n",
" <td>4.000000e-01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>3.646608e+05</td>\n",
" <td>3.646608e+05</td>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.399187e+01</td>\n",
" <td>4.073735e+01</td>\n",
" <td>-7.399133e+01</td>\n",
" <td>4.073588e+01</td>\n",
" <td>3.970000e+02</td>\n",
" <td>9.000000e+00</td>\n",
" <td>1.450000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" <td>5.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.293215e+05</td>\n",
" <td>7.293215e+05</td>\n",
" <td>2.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.398174e+01</td>\n",
" <td>4.075410e+01</td>\n",
" <td>-7.397975e+01</td>\n",
" <td>4.075452e+01</td>\n",
" <td>6.620000e+02</td>\n",
" <td>1.000000e+01</td>\n",
" <td>1.610000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" <td>6.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.093982e+06</td>\n",
" <td>1.093982e+06</td>\n",
" <td>2.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>-7.396733e+01</td>\n",
" <td>4.076836e+01</td>\n",
" <td>-7.396301e+01</td>\n",
" <td>4.076981e+01</td>\n",
" <td>1.075000e+03</td>\n",
" <td>1.000000e+01</td>\n",
" <td>1.610000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" <td>9.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.458643e+06</td>\n",
" <td>1.458643e+06</td>\n",
" <td>2.000000e+00</td>\n",
" <td>9.000000e+00</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>5.188108e+01</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>4.392103e+01</td>\n",
" <td>3.526282e+06</td>\n",
" <td>1.000000e+01</td>\n",
" <td>1.610000e+01</td>\n",
" <td>4.000000e+00</td>\n",
" <td>7.000000e+00</td>\n",
" <td>6.000000e+00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.000000e+00</td>\n",
" <td>4.600000e+01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Unnamed: 0.1 ... holiday_or_not turns\n",
"count 1.458644e+06 1.458644e+06 ... 1.458644e+06 1.458643e+06\n",
"mean 7.293215e+05 7.293215e+05 ... 1.868242e-02 7.547126e+00\n",
"std 4.210744e+05 4.210744e+05 ... 1.354009e-01 4.432504e+00\n",
"min 0.000000e+00 0.000000e+00 ... 0.000000e+00 2.000000e+00\n",
"25% 3.646608e+05 3.646608e+05 ... 0.000000e+00 5.000000e+00\n",
"50% 7.293215e+05 7.293215e+05 ... 0.000000e+00 6.000000e+00\n",
"75% 1.093982e+06 1.093982e+06 ... 0.000000e+00 9.000000e+00\n",
"max 1.458643e+06 1.458643e+06 ... 1.000000e+00 4.600000e+01\n",
"\n",
"[8 rows x 19 columns]"
]
},
"execution_count": 13,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2z07A2gvqUeI"
},
"outputs": [],
"source": [
"df = df.drop('Unnamed: 0', axis=1)\n",
"df = df.drop('Unnamed: 0.1', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xy5yhwjG58iQ"
},
"outputs": [],
"source": [
"df = df.drop('hail', axis=1)\n",
"df = df.drop('thunder', axis=1)\n",
"df = df.drop('tornado', axis=1)\n",
"df = df.drop('visi', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WoLvPXIXIrTl",
"outputId": "774745c8-84e2-4641-9530-68093e69700a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0\n",
"vendor_id 0\n",
"pickup_datetime 0\n",
"dropoff_datetime 0\n",
"passenger_count 0\n",
"pickup_longitude 0\n",
"pickup_latitude 0\n",
"dropoff_longitude 0\n",
"dropoff_latitude 0\n",
"store_and_fwd_flag 0\n",
"trip_duration 0\n",
"vism 51740\n",
"fog 0\n",
"rain 0\n",
"snow 0\n",
"holiday_or_not 0\n",
"turns 1\n",
"dtype: int64\n"
]
}
],
"source": [
"missing_val_count_by_column = (df.isnull().sum())\n",
"print(missing_val_count_by_column)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wF7xm26tJlh0",
"outputId": "2046f273-d858-471f-d500-25a51ec48e0f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0\n",
"vendor_id 0\n",
"pickup_datetime 0\n",
"dropoff_datetime 0\n",
"passenger_count 0\n",
"pickup_longitude 0\n",
"pickup_latitude 0\n",
"dropoff_longitude 0\n",
"dropoff_latitude 0\n",
"store_and_fwd_flag 0\n",
"trip_duration 0\n",
"vism 0\n",
"fog 0\n",
"rain 0\n",
"snow 0\n",
"holiday_or_not 0\n",
"turns 0\n",
"dtype: int64\n"
]
}
],
"source": [
"values = {'vism': 16.1, 'turns': np.round(np.mean(df['turns']))}\n",
"df = df.fillna(value=values)\n",
"missing_val_count_by_column = (df.isnull().sum())\n",
"print(missing_val_count_by_column)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ymGBKkTlhPu1"
},
"source": [
"Max value of trip duration : 3526282 second ~ 41 days which is impossible\n",
"\n",
"Clearly there are some outliers in the data"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nahwlpHAKmMD"
},
"source": [
"# Outlier detection and removal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 415
},
"id": "uTuo_KlphPu1",
"outputId": "ea1fb8e4-c389-4047-e1e4-8e1802ba0884"
},
"outputs": [
{
"data": {
"text/plain": [
"{'boxes': [<matplotlib.lines.Line2D at 0x7f62e29e9cf8>],\n",
" 'caps': [<matplotlib.lines.Line2D at 0x7f62e29cb748>,\n",
" <matplotlib.lines.Line2D at 0x7f62e29cbac8>],\n",
" 'fliers': [<matplotlib.lines.Line2D at 0x7f62e29b6208>],\n",
" 'means': [],\n",
" 'medians': [<matplotlib.lines.Line2D at 0x7f62e29cbe48>],\n",
" 'whiskers': [<matplotlib.lines.Line2D at 0x7f62e29cb048>,\n",
" <matplotlib.lines.Line2D at 0x7f62e29cb3c8>]}"
]
},
"execution_count": 18,
"metadata": {
"tags": []
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEDCAYAAAAlRP8qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAP8ElEQVR4nO3df4ylVX3H8ffHYWRNbSVxp5HA4pqKdnBa/DGhWDcNizEBY+APsWXTqDQjW62gJraJ6SSoJPOHSaOJYDXbDhGMHbVozVaghsRJcKogAwEKjDYbGsMSEkZAkCiwS779Yy44jDN77+ze2TtzeL+SG57nnLPP8/1j8+HZ85xzb6oKSdLW97JBFyBJ6g8DXZIaYaBLUiMMdElqhIEuSY0w0CWpEQMN9CTXJHkkyb09jv/LJPcnuS/Jv210fZK0lWSQ69CT/AXwFHBdVY11GXs68C3g3Kp6PMkfVtUjx6NOSdoKBvqEXlW3AI8tb0vyR0n+K8kdSX6Y5I87XZcCX6qqxzt/1jCXpGU24xz6PuDyqnob8PfAP3fa3wC8Icl/J7k1yXkDq1CSNqETBl3AckleCfw58O9Jnm8+sfPfE4DTgXOAU4FbkvxJVf3yeNcpSZvRpgp0lv7F8MuqevMqfQeB26rqEPB/Sf6XpYC//XgWKEmb1aaacqmqJ1kK6/cBZMmZne7vsvR0TpLtLE3BPDCIOiVpMxr0ssUZ4MfAG5McTDIB/DUwkeRu4D7gws7w7wOPJrkfmAX+oaoeHUTdkrQZDXTZoiSpfzbVlIsk6egN7KXo9u3ba+fOnYO6vSRtSXfccccvqmpktb6BBfrOnTuZn58f1O0laUtK8vO1+pxykaRGGOiS1AgDXZIaYaBLUiMMdElqhIEuLTMzM8PY2BhDQ0OMjY0xMzMz6JKknm22L+eSBmZmZobJyUmmp6fZtWsXc3NzTExMALBnz54BVyd11/UJPcm2JD9Jcnfnp98+u8qYS5IsJrmr8/nQxpQrbZypqSmmp6fZvXs3w8PD7N69m+npaaampgZdmtSTrt/lkqUvJv+9qnoqyTAwB3y8qm5dNuYSYLyqLuv1xuPj4+XGIm0mQ0NDPP300wwPD7/QdujQIbZt28Zzzz03wMqk30pyR1WNr9bX9Qm9ljzVOR3ufPxGLzVndHSUubm5F7XNzc0xOjo6oIqk9enppWiSoSR3AY8AN1fVbasMe2+Se5Jcn2THGtfZm2Q+yfzi4uIxlC313+TkJBMTE8zOznLo0CFmZ2eZmJhgcnJy0KVJPVnX1+cmOQn4D5Z+8/PeZe2vBp6qqmeS/C3wV1V17pGu5ZSLNqOZmRmmpqZYWFhgdHSUyclJX4hqUznSlMu6vw89yRXAr6vqn9boHwIeq6pXHek6Brokrd8xzaEnGek8mZPkFcC7gJ+uGHPystMLgIWjL1eSdDR6WYd+MnBt58n7ZcC3qup7Sa4E5qtqP/CxJBcAh4HHgEs2qmBJ0uoG9hN0TrlI0vod05SLJGlrMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDWia6An2ZbkJ0nuTnJfks+uMubEJN9MciDJbUl2bkSxkqS19fKE/gxwblWdCbwZOC/J2SvGTACPV9XrgS8An+tvmZKkbroGei15qnM63PnUimEXAtd2jq8H3pkkfatSktRVT3PoSYaS3AU8AtxcVbetGHIK8CBAVR0GngBevcp19iaZTzK/uLh4bJVLkl6kp0Cvqueq6s3AqcBZScaO5mZVta+qxqtqfGRk5GguIUlaw7pWuVTVL4FZ4LwVXQ8BOwCSnAC8Cni0HwVKknrTyyqXkSQndY5fAbwL+OmKYfuBD3aOLwJ+UFUr59klSRvohB7GnAxcm2SIpf8BfKuqvpfkSmC+qvYD08DXkhwAHgMu3rCKJUmr6hroVXUP8JZV2q9Ydvw08L7+liZJWg93ikpSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmN6BroSXYkmU1yf5L7knx8lTHnJHkiyV2dzxWrXUuStHG6/kg0cBj4ZFXdmeT3gTuS3FxV968Y98Oqek//S5Qk9aLrE3pVPVxVd3aOfwUsAKdsdGGSpPVZ1xx6kp3AW4DbVul+e5K7k9yU5E1r/Pm9SeaTzC8uLq67WEnS2noO9CSvBL4NfKKqnlzRfSfw2qo6E7gK+O5q16iqfVU1XlXjIyMjR1uzJGkVPQV6kmGWwvzrVfWdlf1V9WRVPdU5vhEYTrK9r5VKko6ol1UuAaaBhar6/BpjXtMZR5KzOtd9tJ+FSpKOrJdVLu8A3g/8T5K7Om3/CJwGUFVfAS4CPpLkMPAb4OKqqg2oV5K0hq6BXlVzQLqMuRq4ul9FSZLWz52iktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGurTMzMwMY2NjDA0NMTY2xszMzKBLknrWy8Yi6SVhZmaGyclJpqen2bVrF3Nzc0xMTACwZ8+eAVcndZdBbegcHx+v+fn5gdxbWs3Y2BhXXXUVu3fvfqFtdnaWyy+/nHvvvXeAlUm/leSOqhpftc9Al5YMDQ3x9NNPMzw8/ELboUOH2LZtG88999wAK5N+60iB7hy61DE6Osrc3NyL2ubm5hgdHR1QRdL6GOhSx+TkJBMTE8zOznLo0CFmZ2eZmJhgcnJy0KVJPfGlqNTx/IvPyy+/nIWFBUZHR5mamvKFqLYM59AlaQtxDl2SXgIMdElqhIEuLeNOUW1lvhSVOtwpqq3Ol6JShztFtRUc00vRJDuSzCa5P8l9ST6+ypgk+WKSA0nuSfLWfhQuHU8LCwvs2rXrRW27du1iYWFhQBVJ69PLHPph4JNVdQZwNvDRJGesGHM+cHrnsxf4cl+rlI4Dd4pqq+sa6FX1cFXd2Tn+FbAAnLJi2IXAdbXkVuCkJCf3vVppA7lTVFvdul6KJtkJvAW4bUXXKcCDy84PdtoeXvHn97L0BM9pp522vkqlDbZnzx5+9KMfcf755/PMM89w4okncumll/pCVFtGz8sWk7wS+Dbwiap68mhuVlX7qmq8qsZHRkaO5hLShpmZmeGGG27gpptu4tlnn+Wmm27ihhtucOmitoyeAj3JMEth/vWq+s4qQx4Cdiw7P7XTJm0ZU1NTTE9Ps3v3boaHh9m9ezfT09NMTU0NujSpJ72scgkwDSxU1efXGLYf+EBntcvZwBNV9fAaY6VNyVUu2up6eUJ/B/B+4Nwkd3U+707y4SQf7oy5EXgAOAD8C/B3G1OutHFc5aKtrutL0aqaA9JlTAEf7VdR0iA8v8pl5U5Rp1y0Vbj1X+rw+9C11bn1X5K2EL8PXZJeAgx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjugZ6kmuSPJLk3jX6z0nyxLIfkL6i/2VKkrrp5TdFvwpcDVx3hDE/rKr39KUiSdJR6fqEXlW3AI8dh1okScegX3Pob09yd5KbkrxprUFJ9iaZTzK/uLjYp1tLkqA/gX4n8NqqOhO4CvjuWgOral9VjVfV+MjISB9uLUl63jEHelU9WVVPdY5vBIaTbD/myiRJ63LMgZ7kNUnSOT6rc81Hj/W6kqT16brKJckMcA6wPclB4NPAMEBVfQW4CPhIksPAb4CLq6o2rGJJ0qq6BnpV7enSfzVLyxolSQPkTlFJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEV0DPck1SR5Jcu8a/UnyxSQHktyT5K39L1OS1E0vT+hfBc47Qv/5wOmdz17gy8deliRpvboGelXdAjx2hCEXAtfVkluBk5Kc3K8CJUm96ccc+inAg8vOD3bafkeSvUnmk8wvLi724daSpOcd15eiVbWvqsaranxkZOR43lqSmtePQH8I2LHs/NROmyTpOOpHoO8HPtBZ7XI28ERVPdyH60qS1uGEbgOSzADnANuTHAQ+DQwDVNVXgBuBdwMHgF8Df7NRxUqS1tY10KtqT5f+Aj7at4okSUfFnaKS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSI3oK9CTnJflZkgNJPrVK/yVJFpPc1fl8qP+lSpKOpOuPRCcZAr4EvAs4CNyeZH9V3b9i6Der6rINqFGS1INentDPAg5U1QNV9SzwDeDCjS1LkrRevQT6KcCDy84PdtpWem+Se5Jcn2THahdKsjfJfJL5xcXFoyhXkrSWfr0U/U9gZ1X9KXAzcO1qg6pqX1WNV9X4yMhIn24tSYLeAv0hYPkT96mdthdU1aNV9Uzn9F+Bt/WnPElSr3oJ9NuB05O8LsnLgYuB/csHJDl52ekFwEL/SpQk9aLrKpeqOpzkMuD7wBBwTVXdl+RKYL6q9gMfS3IBcBh4DLhkA2uWJK0iVTWQG4+Pj9f8/PxA7i1JW1WSO6pqfLU+d4pKUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEZ0/T506aUkye+0DeorpqX18gld6lgtzI/ULm02BrokNcJAl6RGGOiS1IieAj3JeUl+luRAkk+t0n9ikm92+m9LsrPfhUqSjqxroCcZAr4EnA+cAexJcsaKYRPA41X1euALwOf6Xagk6ch6eUI/CzhQVQ9U1bPAN4ALV4y5ELi2c3w98M64NECSjqte1qGfAjy47Pwg8Gdrjamqw0meAF4N/GL5oCR7gb0Ap5122lGWrJe0z7xqwy5dn/6D43/fzzyxMdfVS9Jx3VhUVfuAfQDj4+Pu1tD6bWAAHukflW4u0lbQy5TLQ8COZeendtpWHZPkBOBVwKP9KFCS1JteAv124PQkr0vycuBiYP+KMfuBD3aOLwJ+UD7SaItZ66+sf5W1VXSdcunMiV8GfB8YAq6pqvuSXAnMV9V+YBr4WpIDwGMshb605Rje2sp6mkOvqhuBG1e0XbHs+Gngff0tTZK0Hu4UlaRGGOiS1AgDXZIaYaBLUiMyqLf6SRaBnw/k5lJ321mx01naJF5bVSOrdQws0KXNLMl8VY0Pug5pPZxykaRGGOiS1AgDXVrdvkEXIK2Xc+iS1Aif0CWpEQa6JDXCQJeWSXJNkkeS3DvoWqT1MtClF/sqcN6gi5COhoEuLVNVt7D0nf7SlmOgS1IjDHRJaoSBLkmNMNAlqREGurRMkhngx8AbkxxMMjHomqReufVfkhrhE7okNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY34f+5umTA2mWbVAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light",
"tags": []
},
"output_type": "display_data"
}
],
"source": [
"plt.boxplot(df['trip_duration'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YMHAxF7h7W1m",
"outputId": "960a156e-0b34-4474-d63d-28a9bf32794a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Instances with trip duration greater than 2092.0 are outliers as per Boxplot analysis.\n"
]
}
],
"source": [
"Q1 = np.percentile(df['trip_duration'], 25, interpolation = 'midpoint') \n",
"Q2 = np.percentile(df['trip_duration'], 50, interpolation = 'midpoint') \n",
"Q3 = np.percentile(df['trip_duration'], 75, interpolation = 'midpoint') \n",
"IQR = Q3 - Q1\n",
"low_lim = Q1 - 1.5 * IQR \n",
"up_lim = Q3 + 1.5 * IQR\n",
"print(\"Instances with trip duration greater than {} are outliers as per Boxplot analysis.\".format(up_lim))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4igtTKF7hPu2"
},
"source": [
"Hence we can safely consider instances with trip duration > 5900 second as outliers.\n",
"Also trip duration < 60 second(~ 1 min) does not make any sense. Hence we will remove such instances as well. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sxsY-b-IGWFl"
},
"outputs": [],
"source": [
"df = df[(df.trip_duration < 5900)]\n",
"# df = df[(df.trip_duration > 60)]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KkVIECiehPu2"
},
"source": [
"Instances with passenger_count = 0 also need to be removed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YWyc3pQjhPu2"
},
"outputs": [],
"source": [
"df = df[(df.passenger_count > 0)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 320
},
"id": "TvMqWf8jT1ab",
"outputId": "7b434c36-cbbc-4245-86ce-8be38db62dfb"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>vendor_id</th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>trip_duration</th>\n",
" <th>vism</th>\n",
" <th>fog</th>\n",
" <th>rain</th>\n",
" <th>snow</th>\n",
" <th>holiday_or_not</th>\n",
" <th>turns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" <td>1.456018e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.534277e+00</td>\n",
" <td>1.664016e+00</td>\n",
" <td>-7.397352e+01</td>\n",
" <td>4.075094e+01</td>\n",
" <td>-7.397343e+01</td>\n",
" <td>4.075181e+01</td>\n",
" <td>8.348868e+02</td>\n",
" <td>1.467787e+01</td>\n",
" <td>6.518463e-03</td>\n",
" <td>9.605307e-02</td>\n",
" <td>2.387539e-02</td>\n",
" <td>1.868590e-02</td>\n",
" <td>7.543262e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.988239e-01</td>\n",
" <td>1.313631e+00</td>\n",
" <td>7.088238e-02</td>\n",
" <td>3.284235e-02</td>\n",
" <td>7.061578e-02</td>\n",
" <td>3.585719e-02</td>\n",
" <td>6.491411e+02</td>\n",
" <td>3.070585e+00</td>\n",
" <td>1.124494e-01</td>\n",
" <td>5.186673e-01</td>\n",
" <td>2.773547e-01</td>\n",
" <td>1.354132e-01</td>\n",
" <td>4.428224e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.435970e+01</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.218114e+01</td>\n",
" <td>1.000000e+00</td>\n",
" <td>4.000000e-01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.399187e+01</td>\n",
" <td>4.073737e+01</td>\n",
" <td>-7.399133e+01</td>\n",
" <td>4.073590e+01</td>\n",
" <td>3.970000e+02</td>\n",
" <td>1.450000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>5.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.398174e+01</td>\n",
" <td>4.075411e+01</td>\n",
" <td>-7.397975e+01</td>\n",
" <td>4.075453e+01</td>\n",
" <td>6.610000e+02</td>\n",
" <td>1.610000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>6.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>-7.396735e+01</td>\n",
" <td>4.076836e+01</td>\n",
" <td>-7.396302e+01</td>\n",
" <td>4.076982e+01</td>\n",
" <td>1.072000e+03</td>\n",
" <td>1.610000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>9.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2.000000e+00</td>\n",
" <td>9.000000e+00</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>5.188108e+01</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>4.392103e+01</td>\n",
" <td>5.999000e+03</td>\n",
" <td>1.610000e+01</td>\n",
" <td>4.000000e+00</td>\n",
" <td>7.000000e+00</td>\n",
" <td>6.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>4.600000e+01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" vendor_id passenger_count ... holiday_or_not turns\n",
"count 1.456018e+06 1.456018e+06 ... 1.456018e+06 1.456018e+06\n",
"mean 1.534277e+00 1.664016e+00 ... 1.868590e-02 7.543262e+00\n",
"std 4.988239e-01 1.313631e+00 ... 1.354132e-01 4.428224e+00\n",
"min 1.000000e+00 1.000000e+00 ... 0.000000e+00 2.000000e+00\n",
"25% 1.000000e+00 1.000000e+00 ... 0.000000e+00 5.000000e+00\n",
"50% 2.000000e+00 1.000000e+00 ... 0.000000e+00 6.000000e+00\n",
"75% 2.000000e+00 2.000000e+00 ... 0.000000e+00 9.000000e+00\n",
"max 2.000000e+00 9.000000e+00 ... 1.000000e+00 4.600000e+01\n",
"\n",
"[8 rows x 13 columns]"
]
},
"execution_count": 22,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nDl5UVbAhPu2"
},
"source": [
"# Feature Extraction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "i2SF-MV1hr_z"
},
"outputs": [],
"source": [
"y = df['trip_duration'] \n",
"X = df.drop(['trip_duration'], axis=1)\n",
"X = X.drop('id',axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UbsYDCWfsV6a"
},
"source": [
"### Encoding vendor_id"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rD0BmhbJhPu3",
"outputId": "9ac5e61a-b6f9-4d46-dc72-9c027c65bd5b",
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Vendor list : [2 1]\n"
]
}
],
"source": [
"vendor_id_list = pd.unique(X['vendor_id'])\n",
"print(\"Vendor list :\", vendor_id_list)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SYoAyI12hPu3"
},
"source": [
"There are two unique vendors in the dataset.\n",
"Since this is categorical data, we can perform one hot encoding on it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rWHK4tinhr_8",
"scrolled": true
},
"outputs": [],
"source": [
"#encoding vendor_id ={1,2} to vendor_id_1 and vendor_id_2 columns\n",
"encoded_vendor_id=pd.get_dummies(X['vendor_id'], prefix='vendor_id')\n",
"# Drop column vendor_id as it is now encoded\n",
"X = X.drop('vendor_id',axis = 1)\n",
"# Join original with encoded \n",
"X = X.join(encoded_vendor_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_ualwbX7yq6k"
},
"source": [
"### Encoding store_and_fwd_flag"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GHWPtsgfhPu3",
"outputId": "6df7094f-595f-4f23-db83-6fa031173d1c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Flag Values : ['N' 'Y']\n"
]
}
],
"source": [
"flag_values = pd.unique(X['store_and_fwd_flag'])\n",
"print(\"Flag Values :\", flag_values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "R5LDruDZx7TM"
},
"outputs": [],
"source": [
"#encoding store_and_fwd_flag = {Y,N} to flag_1 and flag_2 columns\n",
"encoded_flag_id=pd.get_dummies(X['store_and_fwd_flag'], prefix='flag')\n",
"# Drop column store_and_fwd_flag as it is now encoded\n",
"X = X.drop('store_and_fwd_flag',axis = 1)\n",
"# Join original with encoded \n",
"X = X.join(encoded_flag_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "O1WUDPE1hPu3"
},
"source": [
"### Calculating distance related features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "6GIJD3IGhsAA"
},
"outputs": [],
"source": [
"X['lat_diff'] = abs(X['pickup_latitude'] - X['dropoff_latitude'])\n",
"X['long_diff'] = abs(X['pickup_longitude'] - X['dropoff_longitude'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "OhMPK8SZhPu3"
},
"outputs": [],
"source": [
"def get_euclidean_dist(p_lat, p_long, d_lat, d_long):\n",
" return np.sqrt(np.power(p_lat-d_lat, 2) + np.power(p_long-d_long, 2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pWybkDgQhPu3"
},
"outputs": [],
"source": [
"def get_haversine_dist(lat1, lng1, lat2, lng2):\n",
" lat1, lng1, lat2, lng2 = map(np.radians, (lat1, lng1, lat2, lng2))\n",
" AVG_EARTH_RADIUS = 6371 # in km\n",
" lat = lat2 - lat1\n",
" lng = lng2 - lng1\n",
" d = np.sin(lat * 0.5) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(lng * 0.5) ** 2\n",
" h = 2 * AVG_EARTH_RADIUS * np.arcsin(np.sqrt(d))\n",
" return(h)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZBKo6Af6hPu3"
},
"outputs": [],
"source": [
"def get_manhattan_distance(lat1, lng1, lat2, lng2):\n",
" a = get_haversine_dist(lat1, lng1, lat1, lng2)\n",
" b = get_haversine_dist(lat1, lng1, lat2, lng1)\n",
" return a + b"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ooqpy-bghPu3"
},
"outputs": [],
"source": [
"X['euclidean_dist'] = get_euclidean_dist(X['pickup_latitude'].to_numpy(), X['pickup_longitude'].to_numpy(), X['dropoff_latitude'].to_numpy(), X['dropoff_longitude'].to_numpy())\n",
"X['haversine_dist'] = get_haversine_dist(X['pickup_latitude'].to_numpy(), X['pickup_longitude'].to_numpy(), X['dropoff_latitude'].to_numpy(), X['dropoff_longitude'].to_numpy())\n",
"X['manhattan_dist'] = get_manhattan_distance(X['pickup_latitude'].to_numpy(), X['pickup_longitude'].to_numpy(), X['dropoff_latitude'].to_numpy(), X['dropoff_longitude'].to_numpy())\n",
"# X['speed_haversine'] = X['manhattan_dist'] / y\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ON03y96nhPu3"
},
"source": [
"### Calculating time related features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Rt3E99CkhsAP"
},
"outputs": [],
"source": [
"X['pickup_datetime'] = pd.to_datetime(X['pickup_datetime'], \n",
"format = '%Y-%m-%d %H:%M:%S', \n",
" errors = 'coerce')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iBjPHG7m7g6v",
"outputId": "f3ac7131-c433-4863-9aac-d5feb77bf046"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:16: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n",
" app.launch_new_instance()\n"
]
}
],
"source": [
"X['pickup_day_of_the_week'] = X['pickup_datetime'].dt.dayofweek\n",
"\n",
"X['pickup_hour'] = X['pickup_datetime'].dt.hour\n",
"\n",
"X['pickup_month'] = X['pickup_datetime'].dt.month\n",
"X['pickup_day_of_year'] = X['pickup_datetime'].dt.dayofyear\n",
"X['pickup_week_of_year'] = X['pickup_datetime'].dt.weekofyear\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 383
},
"id": "Q5n8YUbKWQUU",
"outputId": "86e2808e-bfeb-4b97-eaa2-7d7b4381a1b6"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>vism</th>\n",
" <th>fog</th>\n",
" <th>rain</th>\n",
" <th>snow</th>\n",
" <th>holiday_or_not</th>\n",
" <th>turns</th>\n",
" <th>vendor_id_1</th>\n",
" <th>vendor_id_2</th>\n",
" <th>flag_N</th>\n",
" <th>flag_Y</th>\n",
" <th>lat_diff</th>\n",
" <th>long_diff</th>\n",
" <th>euclidean_dist</th>\n",
" <th>haversine_dist</th>\n",
" <th>manhattan_dist</th>\n",
" <th>pickup_day_of_the_week</th>\n",
" <th>pickup_hour</th>\n",
" <th>pickup_month</th>\n",
" <th>pickup_day_of_year</th>\n",
" <th>pickup_week_of_year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>-73.982155</td>\n",
" <td>40.767937</td>\n",
" <td>-73.964630</td>\n",
" <td>40.765602</td>\n",
" <td>12.90</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>5.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.002335</td>\n",
" <td>0.017525</td>\n",
" <td>0.017680</td>\n",
" <td>1.498521</td>\n",
" <td>1.735433</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>3</td>\n",
" <td>74</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>-73.980415</td>\n",
" <td>40.738564</td>\n",
" <td>-73.999481</td>\n",
" <td>40.731152</td>\n",
" <td>16.10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>6.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.007412</td>\n",
" <td>0.019066</td>\n",
" <td>0.020456</td>\n",
" <td>1.805507</td>\n",
" <td>2.430506</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>164</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>-73.979027</td>\n",
" <td>40.763939</td>\n",
" <td>-74.005333</td>\n",
" <td>40.710087</td>\n",
" <td>16.10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>16.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.053852</td>\n",
" <td>0.026306</td>\n",
" <td>0.059934</td>\n",
" <td>6.385098</td>\n",
" <td>8.203575</td>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>19</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>-74.010040</td>\n",
" <td>40.719971</td>\n",
" <td>-74.012268</td>\n",
" <td>40.706718</td>\n",
" <td>2.64</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>4.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.013252</td>\n",
" <td>0.002228</td>\n",
" <td>0.013438</td>\n",
" <td>1.485498</td>\n",
" <td>1.661331</td>\n",
" <td>2</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>97</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>-73.973053</td>\n",
" <td>40.793209</td>\n",
" <td>-73.972923</td>\n",
" <td>40.782520</td>\n",
" <td>16.10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>5.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.010689</td>\n",
" <td>0.000130</td>\n",
" <td>0.010690</td>\n",
" <td>1.188588</td>\n",
" <td>1.199457</td>\n",
" <td>5</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>86</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>-73.982857</td>\n",
" <td>40.742195</td>\n",
" <td>-73.992081</td>\n",
" <td>40.749184</td>\n",
" <td>16.10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>5.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.006989</td>\n",
" <td>0.009224</td>\n",
" <td>0.011572</td>\n",
" <td>1.098942</td>\n",
" <td>1.554180</td>\n",
" <td>5</td>\n",
" <td>22</td>\n",
" <td>1</td>\n",
" <td>30</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>4</td>\n",
" <td>-73.969017</td>\n",
" <td>40.757839</td>\n",
" <td>-73.957405</td>\n",
" <td>40.765896</td>\n",
" <td>16.10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.008057</td>\n",
" <td>0.011612</td>\n",
" <td>0.014133</td>\n",
" <td>1.326279</td>\n",
" <td>1.873902</td>\n",
" <td>4</td>\n",
" <td>22</td>\n",
" <td>6</td>\n",
" <td>169</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1</td>\n",
" <td>-73.969276</td>\n",
" <td>40.797779</td>\n",
" <td>-73.922470</td>\n",
" <td>40.760559</td>\n",
" <td>16.10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>17.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.037220</td>\n",
" <td>0.046806</td>\n",
" <td>0.059801</td>\n",
" <td>5.714981</td>\n",
" <td>8.078684</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>142</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1</td>\n",
" <td>-73.999481</td>\n",
" <td>40.738400</td>\n",
" <td>-73.985786</td>\n",
" <td>40.732815</td>\n",
" <td>12.90</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.005585</td>\n",
" <td>0.013695</td>\n",
" <td>0.014790</td>\n",
" <td>1.310353</td>\n",
" <td>1.774804</td>\n",
" <td>4</td>\n",
" <td>23</td>\n",
" <td>5</td>\n",
" <td>148</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1</td>\n",
" <td>-73.981049</td>\n",
" <td>40.744339</td>\n",
" <td>-73.973000</td>\n",
" <td>40.789989</td>\n",
" <td>14.50</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>13.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.045650</td>\n",
" <td>0.008049</td>\n",
" <td>0.046355</td>\n",
" <td>5.121162</td>\n",
" <td>5.754187</td>\n",
" <td>3</td>\n",
" <td>21</td>\n",
" <td>3</td>\n",
" <td>70</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" passenger_count pickup_longitude ... pickup_day_of_year pickup_week_of_year\n",
"0 1 -73.982155 ... 74 11\n",
"1 1 -73.980415 ... 164 23\n",
"2 1 -73.979027 ... 19 3\n",
"3 1 -74.010040 ... 97 14\n",
"4 1 -73.973053 ... 86 12\n",
"5 6 -73.982857 ... 30 4\n",
"6 4 -73.969017 ... 169 24\n",
"7 1 -73.969276 ... 142 20\n",
"8 1 -73.999481 ... 148 21\n",
"9 1 -73.981049 ... 70 10\n",
"\n",
"[10 rows x 25 columns]"
]
},
"execution_count": 35,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"X = X.drop('pickup_datetime',axis=1)\n",
"X = X.drop('dropoff_datetime', axis=1)\n",
"X[:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JsXogqJlWQUW",
"outputId": "538f7421-f3c8-4433-e982-fe5235b3e750",
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"List of features : ['passenger_count' 'pickup_longitude' 'pickup_latitude'\n",
" 'dropoff_longitude' 'dropoff_latitude' 'vism' 'fog' 'rain' 'snow'\n",
" 'holiday_or_not' 'turns' 'vendor_id_1' 'vendor_id_2' 'flag_N' 'flag_Y'\n",
" 'lat_diff' 'long_diff' 'euclidean_dist' 'haversine_dist' 'manhattan_dist'\n",
" 'pickup_day_of_the_week' 'pickup_hour' 'pickup_month'\n",
" 'pickup_day_of_year' 'pickup_week_of_year']\n"
]
}
],
"source": [
"feature_list = X.columns.values\n",
"print(\"List of features : {}\".format(feature_list))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "bQMrzdyGhPu4"
},
"source": [
"# Training"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "X3cvffRoUYJE"
},
"source": [
"Random Forest Regression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eEEy6Yqj7g6v"
},
"outputs": [],
"source": [
"X_temp = X\n",
"X_temp = X_temp.drop('flag_Y', axis=1)\n",
"X_temp = X_temp.drop('flag_N', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LRtGHpn_7g6x"
},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X_temp, y, test_size = 0.2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cKgvCqtVa1KB",
"outputId": "c461b3dc-9de3-40d9-c082-9a8d0a0c2be5"
},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" max_samples=None, min_impurity_decrease=0.0,\n",
" min_impurity_split=None, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=80, n_jobs=-1, oob_score=False,\n",
" random_state=42, verbose=0, warm_start=False)"
]
},
"execution_count": 39,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"regr = RandomForestRegressor(n_estimators=80, random_state=42, n_jobs=-1)\n",
"regr.fit(X_temp, y)\n",
"# regr.fit(X_train, y_train)\n",
"# Y_test = regr.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PVHcFd-NWQUY",
"outputId": "0967d9fa-bd6b-4fc1-db47-c070b079a274",
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0.3396782667155511"
]
},
"execution_count": 30,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"# from sklearn.metrics import mean_squared_log_error\n",
"# Y_test = regr.predict(X_test)\n",
"# Y_test_temp = np.round(Y_test)\n",
"# np.sqrt(mean_squared_log_error(y_test, Y_test_temp))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "reocx_zPmchc"
},
"source": [
"# Feature Extraction on test data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gyqmdO6IUln3",
"outputId": "50c64ae8-2097-41ff-8a28-47f46cc80b01"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: extended_test.csv.zip\n",
"replace extended_test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n",
" inflating: extended_test.csv \n",
"replace __MACOSX/._extended_test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n",
" inflating: __MACOSX/._extended_test.csv \n"
]
}
],
"source": [
"!unzip 'extended_test.csv.zip'\n",
"df_test = pd.read_csv('extended_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 338
},
"id": "wulg-B6jC-pQ",
"outputId": "278ae1fc-bdab-4d3c-b43a-295f19170c03"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Unnamed: 0.1</th>\n",
" <th>vendor_id</th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>holiday_or_not</th>\n",
" <th>number_of_steps</th>\n",
" <th>visi</th>\n",
" <th>vism</th>\n",
" <th>fog</th>\n",
" <th>rain</th>\n",
" <th>snow</th>\n",
" <th>hail</th>\n",
" <th>thunder</th>\n",
" <th>tornado</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>602975.000000</td>\n",
" <td>602975.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.0</td>\n",
" <td>625134.0</td>\n",
" <td>625134.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>312566.500000</td>\n",
" <td>312566.500000</td>\n",
" <td>1.534884</td>\n",
" <td>1.661765</td>\n",
" <td>-73.973614</td>\n",
" <td>40.750927</td>\n",
" <td>-73.973458</td>\n",
" <td>40.751816</td>\n",
" <td>0.018710</td>\n",
" <td>7.545392</td>\n",
" <td>9.081615</td>\n",
" <td>14.622785</td>\n",
" <td>0.006514</td>\n",
" <td>0.094818</td>\n",
" <td>0.024284</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>180460.785927</td>\n",
" <td>180460.785927</td>\n",
" <td>0.498782</td>\n",
" <td>1.311293</td>\n",
" <td>0.073389</td>\n",
" <td>0.029848</td>\n",
" <td>0.072565</td>\n",
" <td>0.035824</td>\n",
" <td>0.135497</td>\n",
" <td>4.435818</td>\n",
" <td>1.931477</td>\n",
" <td>3.113787</td>\n",
" <td>0.111340</td>\n",
" <td>0.514837</td>\n",
" <td>0.280462</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-121.933128</td>\n",
" <td>37.389587</td>\n",
" <td>-121.933327</td>\n",
" <td>36.601322</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.200000</td>\n",
" <td>0.400000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>156283.250000</td>\n",
" <td>156283.250000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>-73.991852</td>\n",
" <td>40.737392</td>\n",
" <td>-73.991318</td>\n",
" <td>40.736000</td>\n",
" <td>0.000000</td>\n",
" <td>5.000000</td>\n",
" <td>9.000000</td>\n",
" <td>14.500000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>312566.500000</td>\n",
" <td>312566.500000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>-73.981743</td>\n",
" <td>40.754093</td>\n",
" <td>-73.979774</td>\n",
" <td>40.754543</td>\n",
" <td>0.000000</td>\n",
" <td>6.000000</td>\n",
" <td>10.000000</td>\n",
" <td>16.100000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>468849.750000</td>\n",
" <td>468849.750000</td>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" <td>-73.967400</td>\n",
" <td>40.768394</td>\n",
" <td>-73.963013</td>\n",
" <td>40.769852</td>\n",
" <td>0.000000</td>\n",
" <td>9.000000</td>\n",
" <td>10.000000</td>\n",
" <td>16.100000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>625133.000000</td>\n",
" <td>625133.000000</td>\n",
" <td>2.000000</td>\n",
" <td>9.000000</td>\n",
" <td>-69.248917</td>\n",
" <td>42.814938</td>\n",
" <td>-67.496796</td>\n",
" <td>48.857597</td>\n",
" <td>1.000000</td>\n",
" <td>50.000000</td>\n",
" <td>10.000000</td>\n",
" <td>16.100000</td>\n",
" <td>4.000000</td>\n",
" <td>7.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Unnamed: 0.1 ... thunder tornado\n",
"count 625134.000000 625134.000000 ... 625134.0 625134.0\n",
"mean 312566.500000 312566.500000 ... 0.0 0.0\n",
"std 180460.785927 180460.785927 ... 0.0 0.0\n",
"min 0.000000 0.000000 ... 0.0 0.0\n",
"25% 156283.250000 156283.250000 ... 0.0 0.0\n",
"50% 312566.500000 312566.500000 ... 0.0 0.0\n",
"75% 468849.750000 468849.750000 ... 0.0 0.0\n",
"max 625133.000000 625133.000000 ... 0.0 0.0\n",
"\n",
"[8 rows x 18 columns]"
]
},
"execution_count": 41,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"df_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xvUraj46DGNs",
"outputId": "71ebca0d-b1f4-40c3-e6aa-a68fd7b5231f"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0\n",
"vendor_id 0\n",
"pickup_datetime 0\n",
"passenger_count 0\n",
"pickup_longitude 0\n",
"pickup_latitude 0\n",
"dropoff_longitude 0\n",
"dropoff_latitude 0\n",
"store_and_fwd_flag 0\n",
"pickup_datetime_temp 0\n",
"holiday_or_not 0\n",
"number_of_steps 0\n",
"vism 22159\n",
"fog 0\n",
"rain 0\n",
"snow 0\n",
"dtype: int64\n"
]
}
],
"source": [
"df_test = df_test.drop('Unnamed: 0', axis=1)\n",
"df_test = df_test.drop('Unnamed: 0.1', axis=1)\n",
"df_test = df_test.drop('hail', axis=1)\n",
"df_test = df_test.drop('thunder', axis=1)\n",
"df_test = df_test.drop('tornado', axis=1)\n",
"df_test = df_test.drop('visi', axis=1)\n",
"missing_val_count_by_column = (df_test.isnull().sum())\n",
"print(missing_val_count_by_column)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7_LUhtnwMMNB",
"outputId": "3046c0d0-feb0-4ddb-d2d4-d7ec2f2dd5fb"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0\n",
"vendor_id 0\n",
"pickup_datetime 0\n",
"passenger_count 0\n",
"pickup_longitude 0\n",
"pickup_latitude 0\n",
"dropoff_longitude 0\n",
"dropoff_latitude 0\n",
"store_and_fwd_flag 0\n",
"pickup_datetime_temp 0\n",
"holiday_or_not 0\n",
"number_of_steps 0\n",
"vism 0\n",
"fog 0\n",
"rain 0\n",
"snow 0\n",
"dtype: int64\n"
]
}
],
"source": [
"values = {'vism': 16.1}\n",
"df_test = df_test.fillna(value=values)\n",
"missing_val_count_by_column = (df_test.isnull().sum())\n",
"print(missing_val_count_by_column)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 320
},
"id": "kt-tZKzLMdFe",
"outputId": "bc422a0f-a8c8-40f8-bbe3-56fe7b18d3ad"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>vendor_id</th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>holiday_or_not</th>\n",
" <th>vism</th>\n",
" <th>fog</th>\n",
" <th>rain</th>\n",
" <th>snow</th>\n",
" <th>turns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" <td>625134.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.534884</td>\n",
" <td>1.661765</td>\n",
" <td>-73.973614</td>\n",
" <td>40.750927</td>\n",
" <td>-73.973458</td>\n",
" <td>40.751816</td>\n",
" <td>0.018710</td>\n",
" <td>14.675148</td>\n",
" <td>0.006514</td>\n",
" <td>0.094818</td>\n",
" <td>0.024284</td>\n",
" <td>7.545392</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.498782</td>\n",
" <td>1.311293</td>\n",
" <td>0.073389</td>\n",
" <td>0.029848</td>\n",
" <td>0.072565</td>\n",
" <td>0.035824</td>\n",
" <td>0.135497</td>\n",
" <td>3.070276</td>\n",
" <td>0.111340</td>\n",
" <td>0.514837</td>\n",
" <td>0.280462</td>\n",
" <td>4.435818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-121.933128</td>\n",
" <td>37.389587</td>\n",
" <td>-121.933327</td>\n",
" <td>36.601322</td>\n",
" <td>0.000000</td>\n",
" <td>0.400000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>-73.991852</td>\n",
" <td>40.737392</td>\n",
" <td>-73.991318</td>\n",
" <td>40.736000</td>\n",
" <td>0.000000</td>\n",
" <td>14.500000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>-73.981743</td>\n",
" <td>40.754093</td>\n",
" <td>-73.979774</td>\n",
" <td>40.754543</td>\n",
" <td>0.000000</td>\n",
" <td>16.100000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" <td>-73.967400</td>\n",
" <td>40.768394</td>\n",
" <td>-73.963013</td>\n",
" <td>40.769852</td>\n",
" <td>0.000000</td>\n",
" <td>16.100000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>9.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2.000000</td>\n",
" <td>9.000000</td>\n",
" <td>-69.248917</td>\n",
" <td>42.814938</td>\n",
" <td>-67.496796</td>\n",
" <td>48.857597</td>\n",
" <td>1.000000</td>\n",
" <td>16.100000</td>\n",
" <td>4.000000</td>\n",
" <td>7.000000</td>\n",
" <td>6.000000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" vendor_id passenger_count ... snow turns\n",
"count 625134.000000 625134.000000 ... 625134.000000 625134.000000\n",
"mean 1.534884 1.661765 ... 0.024284 7.545392\n",
"std 0.498782 1.311293 ... 0.280462 4.435818\n",
"min 1.000000 0.000000 ... 0.000000 2.000000\n",
"25% 1.000000 1.000000 ... 0.000000 5.000000\n",
"50% 2.000000 1.000000 ... 0.000000 6.000000\n",
"75% 2.000000 2.000000 ... 0.000000 9.000000\n",
"max 2.000000 9.000000 ... 6.000000 50.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"execution_count": 44,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"df_test['turns'] = df_test['number_of_steps']\n",
"df_test = df_test.drop('number_of_steps', axis = 1)\n",
"df_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Oe62ewNmUzxu",
"outputId": "b1d61f02-8106-4728-ca54-1cb54e609691"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:34: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n"
]
}
],
"source": [
"Xt = df_test\n",
"\n",
"Xt = Xt.drop('id', axis = 1)\n",
"\n",
"#encoding vendor_id ={1,2} to vendor_id_1 and vendor_id_2 columns\n",
"encoded_vendor_id=pd.get_dummies(Xt['vendor_id'], prefix='vendor_id')\n",
"# Drop column vendor_id as it is now encoded\n",
"Xt = Xt.drop('vendor_id',axis = 1)\n",
"# Join original with encoded \n",
"Xt = Xt.join(encoded_vendor_id)\n",
"\n",
"#encoding store_and_fwd_flag = {Y,N} to flag_1 and flag_2 columns\n",
"encoded_flag_id=pd.get_dummies(Xt['store_and_fwd_flag'], prefix='flag')\n",
"# Drop column store_and_fwd_flag as it is now encoded\n",
"Xt = Xt.drop('store_and_fwd_flag',axis = 1)\n",
"# Join original with encoded \n",
"Xt = Xt.join(encoded_flag_id)\n",
"\n",
"Xt['lat_diff'] = abs( Xt['pickup_latitude'] - Xt['dropoff_latitude'] )\n",
"Xt['long_diff'] = abs( Xt['pickup_longitude'] - Xt['dropoff_longitude'] )\n",
"\n",
"Xt['euclidean_dist'] = get_euclidean_dist(Xt['pickup_latitude'].to_numpy(), Xt['pickup_longitude'].to_numpy(), Xt['dropoff_latitude'].to_numpy(), Xt['dropoff_longitude'].to_numpy())\n",
"Xt['haversine_dist'] = get_haversine_dist(Xt['pickup_latitude'].to_numpy(), Xt['pickup_longitude'].to_numpy(), Xt['dropoff_latitude'].to_numpy(), Xt['dropoff_longitude'].to_numpy())\n",
"Xt['manhattan_dist'] = get_manhattan_distance(Xt['pickup_latitude'].to_numpy(), Xt['pickup_longitude'].to_numpy(), Xt['dropoff_latitude'].to_numpy(), Xt['dropoff_longitude'].to_numpy())\n",
"\n",
"Xt['pickup_datetime'] = pd.to_datetime(Xt['pickup_datetime'], \n",
"format = '%Y-%m-%d %H:%M:%S', \n",
" errors = 'coerce')\n",
"\n",
"Xt['pickup_day_of_the_week'] = Xt['pickup_datetime'].dt.dayofweek\n",
"Xt['pickup_hour'] = Xt['pickup_datetime'].dt.hour\n",
"Xt['pickup_month'] = Xt['pickup_datetime'].dt.month\n",
"Xt['pickup_day_of_year'] = Xt['pickup_datetime'].dt.dayofyear\n",
"Xt['pickup_week_of_year'] = Xt['pickup_datetime'].dt.weekofyear\n",
"\n",
"Xt = Xt.drop('pickup_datetime',axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "hGR1Rw0xM2pQ"
},
"outputs": [],
"source": [
"Xt_temp = Xt\n",
"Xt_temp = Xt_temp.drop('flag_Y', axis=1)\n",
"Xt_temp = Xt_temp.drop('flag_N', axis=1)\n",
"Xt_temp = Xt_temp.drop('pickup_datetime_temp', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rO_NDhHkremd"
},
"outputs": [],
"source": [
"yt = regr.predict(Xt_temp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "e3MjSMbGss-S",
"outputId": "4ab38d0e-d537-441e-c790-c388c4ab86ce"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 934. 630. 402. ... 1692. 2181. 1219.]\n"
]
}
],
"source": [
"ytfinal = np.round(yt)\n",
"print(ytfinal)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jfpYtSeVs-l5"
},
"outputs": [],
"source": [
"df_test['trip_duration'] = ytfinal.astype(int)\n",
"df_test.to_csv('rf_nd_80est.csv', columns=['id', 'trip_duration'], index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FrCAAhDV78uc",
"outputId": "0415a2d2-8294-4bb1-b6ca-7ecc54b6d0cd"
},
"outputs": [
{
"data": {
"text/plain": [
"array([4.45450304e-03, 3.00136050e-02, 2.53773546e-02, 2.97859640e-02,\n",
" 3.83625907e-02, 4.72717568e-03, 9.95669094e-05, 1.05654259e-03,\n",
" 1.78983442e-04, 2.38283901e-03, 1.81561447e-02, 1.33713834e-03,\n",
" 1.38256928e-03, 1.66892310e-02, 2.09224373e-02, 2.32449351e-01,\n",
" 4.34095134e-01, 1.31644472e-02, 2.70706945e-02, 7.00947039e-02,\n",
" 2.13636231e-03, 1.96788037e-02, 6.38385796e-03])"
]
},
"execution_count": 55,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"regr.feature_importances_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 282
},
"id": "6ErJA2PH6lub",
"outputId": "55fdcaea-1093-4a57-d92e-86aea26c8a60"
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f62e5f11470>"
]
},
"execution_count": 56,
"metadata": {
"tags": []
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAD4CAYAAACt3uxiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dedyVVb3//9dbnFAUNawcMpTICRUFLCccSm0w00LNIcNM05z6mZanQdEmh451zHnEHEm/WiQ5hSCIAzIPKlpCmVpqKYYoJn5+f6y15WK7p/vmnu/38/Hgwd7XtNa+7JzFuq613ksRgZmZmbWdldq7AmZmZt2NG18zM7M25sbXzMysjbnxNTMza2NufM3MzNrYyu1dAev4+vTpE3379m3vapiZdSpTp059JSLWr7TPja/V1bdvX6ZMmdLe1TAz61Qk/bXaPj927mIknSzpSUk3tXddzMysMvd8u55vAZ+OiL+31AVnP7+wpS5lZma459ulSLoc2Ay4W9J3JP1O0ixJj0raNh+zvqT7Jc2VdLWkv0rq0741NzPrXtz4diERcRzwArAn0BeYHhHbAt8HfpMPOwt4ICK2Bm4HNql0LUnHSpoiacrSxe75mpm1JDe+XdeuwA0AEfEA8AFJa+ftt+bt9wCvVjo5Iq6MiMERMbjHGr3bqMpmZt2DG1+ra5uN3PiambUkN75d10TgcABJewCvRMTrwCTg4Lx9H2Dd9qqgmVl35dHOXdcI4FpJs4DFwNfy9rOBWyR9FXgE+Afwn3apoZlZN+XGt4uJiL6FrwdUOGQhsG9EvCNpJ2BIRCxpk8qZmRngxrc72gT4raSVgLeBY9q5PmZm3Y4b324mIp4Btm/KOQ7ZMDNrWR5w1QlIWlPSGEkzJc2RdIikBZLOljRN0mxJW+Rj16sSrjFb0jpK/iXpyLz9N5L2bs/fZ2bW3bjx7Rw+A7wQEdtFxADgnrz9lYjYAbgMOC1vO5vK4RqTgF2ArYFngd3y9p2Ah1v/J5iZWYkb385hNrC3pPMk7RYRpefAd+S/p5ISraB6uMZEYGj+cxmwjaSNgFcj4o3yAp1wZWbWetz4dgIR8TSwA6kR/omkM/Ou0ijlpdR/fz+B1NvdDRgPvAwMIzXKlcp0wpWZWStx49sJSNoQWBwRNwIXkBriaiqGa0TEc0AfoH9EPAs8RHpUPaFe+U64MjNrWR7t3DlsA1wg6V3gv8DxpEURKhlB5XANgMeAHvnzRODnpEbYzMzakCKivetgHdzgwYNjypQp7V0NM7NORdLUiBhcaZ8fO5uZmbUxN77dhKSrJW3V3vUwMzO/8+1SJIn0KuHd8n0R8Y3mXtcJV2ZmLcs9305OUl9J8yT9BpgDXJPn586VdHbhuPGSBufPiyT9NCdmPSrpQ+1VfzOz7siNb9fQH7g0IrYGvpNf8G8L7F6KlyyzJvBoRGxHmmr0vsUVHLJhZtZ63Ph2DX+NiEfz54MlTQOmk6IkK73nfRu4K38upmO9xyEbZmatx+98u4Y3ACRtSgrOGBIRr0oaCaxe4fj/xrI5ZnXTsRyyYWbWstzz7VrWJjXEC/N73M+2c33MzKwC93y7kIiYKWk68BTwHGklIzMz62CccGV1OeHKzKzpnHBlZmbWgbjxbUN5Tu6cFrjOhpIqLqxQnM9bZf9PJT0naVGj5Tlkw8ysZbnx7cAkVXwnHxEvRMSwZl72D8COza+VmZmtKA+4qkLSucBzEXFJ/j4CWAQIOBhYDbgzIs6S1Be4m7Q8387A88AXI+JNSYOAa/Nl7ytcf3XgMmAw8A5wakSMkzQc+BLQi7T83+4V6tYXuCsiBkjqCVwHbEcaaNWz1u8qzQdOSZQ1f/+xwLEAPdZev+axZmbWNO75VjeK1MiWHAy8TEqT2hEYCAySNDTv7w9cklOmXgO+nLdfB5yU06SKTgAiIrYBDgWuzw0ywA7AsIh4X8NbwfHA4ojYEjgLGNSE31iVQzbMzFqPG98qImI68MH8fnU74FXSovb7kNKjpgFbkBpdgPkRMSN/ngr0lbQOsE5ETMjbbygUsStwYy7rKeCvwMfzvvsj4t8NVnVo4TqzgFlN+qFmZtbm/Ni5ttuAYcCHST3hjwI/j4grigflx8BLCpuWUufxbx1vrMC5Lc4JV2ZmLcs939pGAV8hNcC3AfcCX5fUC0DSRpI+WO3kiHgNeE3SrnnT4YXdE0vfJX0c2ASY14w6TgAOy9cZQFpQwczMOjA3vjVExFxgLeD5iHgxIu4DbgYekTQbuD3vr+Uo4BJJM0iDtUouBVbK1xkFDI+IJZUuUMdlQC9JTwLnkB55VyXpfEl/B9aQ9Pc8kMzMzNqQE66sLidcmZk1nROuzMzMOhA3vm2oqQlXkraRNKPsz2ONJFzl48rPHSJpjKSnJM3Nc5nrcsKVmVnL8mjnju3JiBhYZV/NhKuI+ET5NklrAL/IYR6rAmMlfTYi7m6BupqZWYPc+FbRFROuImIxMC5/flvSNGDjKr/fCVdmZq3Ej52r69IJVzkA5AvA2Er7nXBlZtZ63PhW0ZUTrvKCDbcAF0XEs/WOd8iGmVnL8mPn2rpqwtWVwDMR8atWLsfMzCpwz7e2LpdwJeknQG/g280oy8zMWoB7vjVExFxJ7yVcAS9K2pKUcAVpANYRpJ5uNUcB10oKCgOuSAlXl+WEq3fICVf1lvqr4DLgupxw9SQ1Eq4kbQz8gDQwa1ou6+KIuLqphZqZWfM54crqcsKVmVnTOeGqQZKulrRVjf0jJJ3WlnVaUZIGSvpc4XuTf8Ps5xfS94wxLV85M7Nuyo+dCyLiG+1dhyJJ27D8CGmAJZUCNCqc+xhpLvJ6QE9JzwNfbflamplZU3XLnm+OeXxK0k2SnpR0u6Q1StGM+ZjPSJomaaak982FlXSMpLsl9ZS0qLB9mKSR+fNISZdLmiLpaUn71ajTcEm/k3S/pAWSTgT2BgJ4C9grp119U9KjkmZJulPSuvn88ZLOkzRZ0tPAaaT5yEWlXv1W+fhnJZ1cpT7H5npPWbrY8ZJmZi2pWza+2ebApTmc4nXgW6UdktYHrgK+nMMxDiqemBvG/YADIuLNOuX0JTWCnwcuLwRpVDKAlG41BPgpKTxje+AR4Mh8zG+A70XEtsBsUrBGycoRsSNpJPNZEfE2cCYwKiIGRsSofNwWwL65XmdJWqW8Ig7ZMDNrPd258X0uIiblzzeSQi9KPglMiIj5AGWBF0cCnyUlUDWy/u5vI+LdiHgGeJbU8FUzLiL+ExEvAwuBP+Tts0mhHb1JoR0P5u3Xk0I2Su7If08lNfrVjImIJRHxCvAS8KEGfoeZmbWQ7tz4lg/zbnTY92xSw1bMRC6eW96zbUo5xcb83cL3d2ns/Xzp+KV1ji8PBKl57W026s2Ccz/fQPFmZtaI7tz4biJpp/z5MNKiCCWPAkMlbQogab3CvunAN4HRkjbM2/4paUtJKwEHlpVzkKSVJPUDNqN5QRoARMRC4FVJu+VNXwUerHEKwH+AtZpbppmZtbzu3PjOA07I4RTrksIqAMiPfY8F7pA0k5R0RWH/Q6QBTWMk9QHOAO4CHgZeLCvnb8Bk0qpHx0XEWytY768BF0iaRVrc4Zw6x48jDbCaIemQFSzbzMxaQLcM2SguydfK5YzM5VRc+L6zcMiGmVnTOWTDVohDNszMWla3aXzz3N45ABGxoF6vN8/RHZY/V0y+ynNzL652jYgYXt7rlbRvfgRc/HNn835V80laR9K36h9pZmYtzQlXDWjJ5KuIuJe0OlKzSFo5It5pgaqsQ5rbfGkLXMvMzJqg2/R8sx6SrpI0V9J9OZ1qYKXEqKKy5KujclrVZGCXwjFfkPSYpOmS/iTpQ3mU8zM5tIP8/c+l7xXK6SvpgVyXsZI2ydtLSVmPAedXOXeEpGsrJVdJOlXSnPyntJTguUC/3PO+oML1nHBlZtZKulvj2x+4JCK2Bl4DvkztxKjlSNoAOJvU6O7KsrhGSFOVPpkTqW4FvhsR75ICPErr+H4amJlHU1fya+D6XJebgIsK+zYGdo6IU2v8vvclV0kaRFrW8BOk8JBjJG1PGqH9l5x8dXr5hZxwZWbWerpb4zs/Imbkz1OBftROjCr3CWB8RLycoxuLU5A2Bu5VWp/3dGDrvP1alkVDfh24rsb1dwJuzp9vYPnUrdsiota6wVA5uWpX4M6IeCMiFpFSsHardZFyDtkwM2tZ3a3xLU92WqcFr/1r0sL025BCOFYHiIjnSCEce5F6pHc38/pvNHBMk5KrzMysfXS3xrdcUxOjHgN2l/SBvBhBccGF3sDz+fPXys67mvT4uV7v9WHgK/nz4cDE+j+hronAAUqrNq1JSuCaiJOvzMzajXtGqaG8XNIapIUPjqp2YES8KGkEaZWh14AZhd0jgNskvQo8AGxa2Dea9Li51iNngJOA6ySdDrxcqy6NiohpOexjct50dURMB5A0KU+/urvSe18zM2sd3TLhqq3lkdK/jIgmvWttbXnk85URsbjWcU64MjNruloJV+75tjJJZwDHs2zEc/n+lpq32xzfJj0Or9n4lidcefCVmdmK6bTvfPOc2Kck3STpSUm35/eaZ0p6PM9pvVKS8vEnS3oiz6G9NW/bvZAyNV3SWnn76fkasySdXSjvyfJ5wnnfkHzsDEkXlJK0JPUAPkAaeXyppG/m7VdLWiRpIbAon/eDCr/xyHzdmZJuyNtOk/QfSW/mv+dKukSFRK583KL89x557u/thfulPA94Q2CcpHGt81/JzMwq6bSNb7Y5cGlEbAm8TkpsujgihuT4yJ7AfvnYM4Dt8xza4/K204ATImIgafrNm5L2Ic0H3pG0atAgSaXpR5XmCUN6l/vNfJ3igKqjgYURMQQYQppjuymptylgYESsnufa/rT4wyRtDfwQ2CsitgNOybt2B06MiJ552zMRcUKd+7Q9qZe7FWlZw10i4iLgBWDPiNiz/ASHbJiZtZ7O3vg+FxGT8ucbSXNa91RKmpoN7MWy+bazgJskHQGUHvNOAi7MvcB18uPfffKf6cA0UnBF/3x8+TzhvpLWAdaKiEfy9tI8XfJ1jpQ0gzRS+gOFa02OiPk1fttepNHRrwBExL/z9lpzgauZHBF/z6EfM4C+9U5wyIaZWevp7O98y0eLBSmreHBEPJdHJq+e932eFKDxBeAHkraJiHMljQE+B0yStC+pR/rziLiieGGlZQjL59H2rFM/ASflPOfitfagsXm7TfEO+R9TklYCVi3s8/xfM7MOpLP3fDeRtFP+fBgp4hHgFUm9gNKqRCsBH4mIccD3SHNye0nqFxGzI+I84HFSL/de4Ov5fCRtJOmD1SoQEa8B/5H0ibzpK4Xd9wLH5znBSPp4nmvbiAeAgyR9IJ+7Xt5ebS7wAmBQ/rw/sEoDZTQ017eUcFX6Y2ZmK6az94DmASdIuhZ4ArgMWBeYA/yD1KAC9ABulNSb1Bu9KCJek/RjSXsC7wJzSfNdl0jaEngkj9VaBBzB8u9yyx0NXCXpXVJIR+kl6dWkR7zT8sCvl4EDGvlhETFX0k+BByUtJT0GH071ucBXAb+XNBO4h8Z61lcC90h6odJ7XzMzax2ddp5vfgx8V711eduCpF45N7k0tWiDiDilzmmdhuf5mpk1nef5tr7PS/of0v38K6mHamZmVlGn7fl2Ffmd7tgKuz4VEf9q4PwNSY/Rh9U7trlW26B/bPC1X7333e99zczqc8+3A8sN7MAVOP8F8sAyMzPrHDr7aOduRdK5kk4ofB+RE69KiVpbS5qcE7NmSeqvZUlgIyU9nROuPq20qMIzknZsv19kZtY9ufHtXEYBBxe+H0wK7yg5Dvi/nLQ1GPh73v4x4H9JU6m2IE3L2pWU8PX9SgU54crMrPW48e1E8lKAH5S0oaTtgFeB5wqHPAJ8X9L3gI9GxJt5+/w8n7k0pWpspJf9s6mSduWEKzOz1uN3vp3PbaR3vB8m9YTfExE3S3qMlOb1x7yQw7Msn3D1buH7uzTwv4FtNurNFA+yMjNrMW58O59RpECNPqRFFlYr7ZC0GfBsRFwkaRNgW1Lja2ZmHYgfO3cyETGXFAn5fES8WLb7YGBOXshhAPCbtq6fmZnV53m+VpcTrszMmq7WPF/3fM3MzNpYu7/zlXQ1cGFEPFFl/whgUUT8ok0r1kySFkVErxW8xgWkZQ7/GBGnt0zNmm/28wvpe8aY5bY55crMrPnavfGNiG+0dx06oGOB9SKi1kpKLUrSyhHxTluVZ2bWnbXZY+dC0tJNkp6UdLukNSSNlzQ4H/MZSdMkzZT0vrxjScdIultST0mLCtuHSRqZP4+UdHkOiHha0n416jRG0rb583RJZ+bP50g6Jn8+XdLjOTHq7MK5RxTSpK6Q1KPs2n0kPSKpYhdRyQWS5kiaLemQvH000AuYWtpWdt5akuYX1gheu/RdUj9J90iaKmmipC3yMV+Q9Fj+jX+S9KG8fYSkGyRNAm4oK8chG2ZmraSt3/luDlwaEVsCrwPfKu2QtD5pCs2XI2I74KDiiZJOBPYDDiiER1TTF9iRNN/1ckmrVzluIrCb0jq/7wC75O27ARMk7QP0z9caCAySNFRpvd9DgF1ymtRS0sL2pbp+CBgDnBkRyz+vXeZL+ZrbAZ8GLpC0QUTsD7wZEQMjYlT5SRHxH2B8/m0AXwHuiIj/ktbnPSkiBpHSqy7NxzwEfDIitgduBb5buORWwKcj4tCychyyYWbWStr6sfNzETEpf74ROLmw75PAhIiYDxAR/y7sO5KU5HRAbmTq+W1Oc3pG0rOkSMUZFY6bmOswn9RY7i1pDWDTiJiXe7/7kBayh9Qj7U+aPzsIeFwSQE/gpXzMKqRVik6IiAdr1HFX4Jb8aPmfkh4EhgCjG/h9V5Ma0N8BRwHHSOoF7AzclusEy+YAbwyMkrQBsGr+vSWjG/jHjJmZtaC2bnzL5zU1Os9pNqmXuDHLGo7iueU920bLeZyUgfwscD8puOIYYGreL+DnEXFF8SRJJwHXR8T/VLjmO/n8fYFajW+zRcSk/Bh/D6BHRMyRtDbwWu6Jl/s1aVDb6HzOiMK+N+qV54QrM7OW1daPnTeRtFP+fBjpcWjJo8BQSZsCSFqvsG868E1gtNL6tZB6i1tKWgk4sKycgyStJKkfsBkwr1JlIuJtUo/6IFIu8kTS49oJ+ZB7ga/nXiWSNpL0QVLPdlj+jKT1JH20dFng68AWOWO5monAIZJ65EfuQ4HJNY4v9xvgZuC6/FteB+ZLOijXSUr5zwC9gefz5681oQwzM2sFbd34zgNOkPQksC5wWWlHRLxMGuV7h6SZvD+3+CFSwzhGUh/gDOAu4GGgPOnpb6SG7G7guIh4q0adJgIv5UevE0m964m5zPtIDdwjkmYDtwNr5WlRPwTukzSL1GveoFDXpcChwF6SvkVldwKzgJnAA8B3I+IfNepZ7ibSPbylsO1w4Oh8/+YCX8zbR5AeR08FXmlCGWZm1graLOFKUl/grogY0MrljMzl3N6a5bQ3ScOAL0bEV1u7LCdcmZk1nWokXLX7PN/ORE0M0MjvV9+OiIfz9wOAp6sFijThur8GPksK4ijfN5L8jw/VDzAZDtwXES/UKq9SyEYlDt4wM2tMmzW+EbGAFPbf2uUML98maV/gvLLN8yOi/F1xS9sD6CWpNOVnE2ChpJci4hP1Tpb0A8qmXAG3RcRJjRTeQIDJcGAOULPxNTOzltUle775Efc9pEFcO5NGNZ8GnA18kDwnV9IjpJHSbwJH5elFw4H9gTWAfsCdEfHdwrV/Sppv/Cbpse8/JX2B9A54VeBf+fo9geNIc4BfBk4hvecFWC0PBtuL9J57VeDPwFcjYnHuvb5OGom9Nul9cMXH6Erzin4N7E0aPPZ2Yd/4/LunA9fk6wVwbT52MHCTpDeBnTzlyMysbXTlhRU+BvwvaY7vFqTR1buSGqPvA08Bu+XgiTOBnxXOHUgK0diGNCL5I3n7msCjOQRkAmlaElQIscg9/cuBX+bAjAdJc3hPz9//QgrHGJKv9yRwdKEOG+T67gecW+N3HkgKL9mKNB965wrHDAQ2iogBEbENcF1uzKcAh+f6LNfwOuHKzKz1dMmebzY/ImYDSJoLjI2IyKOW+5Km31wvqT+pN7hK4dyxEbEwn/sE8FGW9SrvysdMJfU2oXaIRS0DJP0EWIcU4HFvYd/vclDIE6U4yCqGsiys4wVJD1Q45llgs/yueAxwX72KRcSVpMQsVtugv9edNDNrQV258V1S+Pxu4fu7pN/9Y2BcRByYH1OPr3LuUpbdp//GsuHhxe21QixqGUlK7ZqZH3fvUaUOYgVExKt5zu++pEfhB5PmIjfEIRtmZi2rKz92rqcYPDG8Ba9VDLH4D7BWje9rAS/mRRIOp3kmsCysYwNgz/ID8rzolSLi/5HeTe9QpT5mZtYGunPjez7wc0nTWfEnACOoHGLxB+BApZWPdiO9Dz49ry7UD/gR8BgwifQOujnuBJ4BniClXj1S4ZiNgPGSZpAytUuxmCNJC0/MkNSzmeWbmVkTtVnIhnVeDtkwM2u6WiEb3bnna2Zm1i668oCrTqNecpakdUhTpPYr27WkWlhHtaSrvPDCOcA/ImJPSbcAW5OmH/2y0rUaTbgqctqVmVl1bnw7h3VIo6KblRBWlnR1NHBMRDwk6cPAkIj4WEtU0szMGuPHzh2IpF6SxkqaJmm2pNKqROcC/fLAqAuqnCtJF0uaJ+lPpCSv0r7xkgZLOpMU3HFNvs59wEaFAWHF6zlkw8yslbjn27G8BRwYEa/n6UGPShpNWj5xQEQMrHFuMenqQ6TRz9cWD4iIcyTtBZwWEVMkXUJ6NP2+6zpkw8ys9bjx7VgE/EzSUFIYyEakhrQRjSRdNYtDNszMWpYb347lcGB9YFBE/FfSAtLCD2Zm1oX4nW/H0ht4KTe8e5IypaGxJKq6SVdmZtYxuPHtWG4CBufFH44kp15FxL+ASZLmVBtwRWNJV2Zm1gH4sXMHUJrjGxGvADuV78/zfB+KiMNqXCOAE6vs26PK5wVAs6YvmZlZ87nx7RzWAb4FXNroCZJ65MFXK6w5IRvgoA0zs2r82LlzKM7znSNpYf48Q9LLkv4CIGmBpPMkTQMOyt/PLswb3iIft3vh/OmSvLKRmVkbcuPbOZwB/CXPxz0RmBgRA/P3UaS1iUv+FRE7RMSt+fsrEbEDcBkpopL89wn5/N2AN9vkV5iZGeDGtysaVfb9jvz3VKBv/jwJuFDSycA6EfFO+UWccGVm1nrc+HY+77D8f7fyecBvlH1fkv9eSn7HHxHnAt8AepJGUW9RXkhEXBkRgyNicI81erdIxc3MLPGAq86hOM/3r8BWklYjNZ6fAh5qysUk9YuI2cBsSUOALcjTmipxwpWZWcty49sJRMS/JE2SNAe4G/gtMAeYD0xvxiW/nUM83gXm5muamVkbUZoealbd4MGDY8qUKe1dDTOzTkXS1IgYXGmf3/mamZm1MTe+ZmZmbaxDv/OVdDVwYUQ8UWX/CGBRRPyiDeoynrwObiuXcxBwDvCPiOgQiyM0N+EKnHJlZlZJh258I+Ib7V2HdnA0cExENGkE84qQtHKlub5mZtY6OsRjZ0l9JT0l6SZJT0q6XdIaksZLGpyP+UyOSZwpaWyFaxwj6W5JPSUtKmwfJmlk/jxS0uU5POJpSfvVqFNPSbfm+txJmtZT2ndZvsZcSWfnbXtJ+l3hmL3zedWuf2iOfJwj6by87UxgV+CaaqsXSZogaWDh+0OStpO0pqRrJU3OkZFfLNzbifneTZO0c96+R94+mrQSUnk5DtkwM2slHannuzlwdERMknQtaSEBACStD1wFDI2I+ZLWK54o6URgb+CAiFgiqVY5fYEdgX7AOEkfi4i3Khx3PLA4IraUtC0wrbDvBxHxb0k9gLF5/zjgUknrR8TLwFHAtZUqIGlD4DxgEPAqcJ+kAyLiHEl7Ufvx9jXAcNJ0oY8Dq0fETEk/Ax6IiK/nVZAmS/oT8BKwd0S8Jak/cAtQGn23AzAgIuaXFxIRVwJXAqy2QX8PiTcza0EdqfF9LiIm5c83AicX9n0SmFBqJCLi34V9RwLPkRre/zZQzm8j4l3gGUnPkgImZlQ4bihwUS5vlqRZhX0HSzqWdP82ALbKx9wAHCHpOtLSgEdWqcMQYHxupJF0Uy7vd1WOL7oN+JGk04GvAyPz9n2A/SWV8ptXBzYBXgAuzr3lpcDHC9eaXKnhLeeQDTOzltWRGt/y3lWjva3ZwEBgY1LoRPm55fGLzS0HAEmbkhYmGBIRr+ZH2qUyrgP+ALwF3NYa71EjYrGk+4EvAgeTes8AAr4cEfPK6jsC+CewHek1Q7GXXx5FaWZmbaBDvPPNNpFUWkj+MJaPTHwUGJobPsoeO08HvgmMzo9zAf4paUtJKwEHlpVzkKSVJPUDNgPmUdmEXA8kDQC2zdvXJjVaCyV9CPhs6YSIeIHU0/whqSGuZjKwu6Q++dH1ocCDNY4vdzWpV/54RLyat90LnKT8zF3S9nl7b+DF3Nv/KtCjCeWYmVkr6EiN7zzgBElPAuuSlsADID+ePRa4Q9JMylbuySODTwPGSOpDWoLvLuBh4MWycv5GavzuBo6r8r6XXH6vXJ9zSKsCEREzSQ3+U8DNpBWCim4iPUJ/stoPjYgXcx3HATOBqRHx+2rHVzh/KvA6yzfwPwZWAWZJmsuyZQYvBb6W79sWuLdrZtbuOkS8pKS+wF0RMaCVyxmZy7m9Fcu4GJgeEde0YhkbAuOBLXKPtlU5XtLMrOlqxUt2pHe+nZ6kqaSe5XcK2xZFRK8WLONI4KfAqbUaXkkLgMER8YqkhyOiNMXoAuBzwB+B80lPCFYFTo6IiZWutSIhG+CgDTOzch2i8Y2IBUCr9npzOcPLt0nalzTtp2h+RJS/K27k+oMqbO4pqXw09Vfzkn5V1anXb5pYr50LX48F1ouIpZK+AszupmEmZmbtpkM0vu0pIu4lDVZqLW9GxMA8EOp80gCtmyX9JCJGSdoDGAG8QvoHyFTgiIi4Nw/GupDUm55EGiBWkaQPkObwbgQ8QrvfZOsAABtKSURBVBr9XNq3KCJ65UCNXsBUSbcAJ5D+cTAY2Cki3iyccyypoabH2uu3zJ0wMzPAjW9b+hJpStR2QB/gcUkT8r7tga1JI6UnAbtImgJcwbJgkVvqXP8s4KEc1PF5UkzlciJi/9wQDwSQ9E/So+kTKxzrkA0zs1bSkUY7d3W7ArdExNKI+CdpatGQvG9yRPw9v8OdQUrh2gJ4thCCUa/xHUoKJyEixpCSs8zMrANyz7djWFL4vJQO9t/FCVdmZi3LPd+2MxE4RFKPnFU9lDTfuJp5wGZ5GhbAIXWuXwwF+SxprrSZmXVAHaqH1cXdScp7nkmKtPxuRPxD0haVDo6INyV9C7hH0hvA43WufzZwSw7YeJgUJmJmZh1QhwjZsMok9YqIRXmk9CXAMxHxy7auh0M2zMyarlbIhh87d2zH5DnCc0kZzVe0c33MzKwFdMvHzpKuBi6MiPctIp/3jwAWRcQvWqHsPUjr9e5X45iBwIa5l/tLSfuTli1cLOki0trFxUFakyLihDrljqf2OsFVrWjCVZHTrszMumnj2wkSnQaSFrz/I0BEjAZG531rAz9qzXxqMzNrXV36sbOkvpKeknSTpCcl3S5pDUnjc6oTkj4jaZqkmZLGVrjGMZLultRT0qLC9mF5oQYkjZR0uaQpkp6WVLVXW3btHSU9Imm6pIclbS5pVdIqSodImiHpEEnDJV0saWdgf+CCvK9f2W/pkzOdyfW9Nf/uO4GehXL3yeVOk3SbpPdlT0s6Nv+eKUsXL2z4npuZWX1duvHNNgcujYgtScvwfau0I0/5uYq0CP12wEHFEyWdCOwHHFCMXqyiL7Aj8HngckmrN1C3p4DdImJ74EzgZxHxdv48KiIGRsR7yydGxMOkHvDped9falz7eGBx/t1nAYPyb+pDWm/40xGxAzAFOLX85Ii4MiIGR8TgHmv0buCnmJlZo7rDY+fnIqK05u6NwMmFfZ8EJpRSpCLi34V9RwLPkRre/zZQzm9zQtUzkp4lJVSVL6hQrjdwvaT+pOlHqzRQTqOGAhcBRMQsSbPy9k8CWwGT0iBqViVlQVflkA0zs5bVHRrf8rlUjc6tmk1697oxUIp4LJ5b3rNtTjk/BsZFxIE5TGN8g3UreodlTzAa6W0LuD8iDm1GWWZm1gK6w2PnTSTtlD8fBjxU2PcoMFTSpgCS1ivsmw58ExidF68H+KekLSWtBJQvOXiQpJUk9SOtPjSvgbr1Bp7Pn4cXtv8HWKvKOeX7FpAfKQPDCtuLiVcDgG3z9kdJCzd8LO9bU9LHG6irmZm1kO7Q+M4DTpD0JCly8bLSjoh4mbRs3h2SZgKjiidGxEPAacCY/K70DNLi8w8DL5aV8zdSXOTdwHER8VYDdTsf+Lmk6Sz/FGIcsFVpwFXZObcCp+dBWv2AXwDH52v0KRx3GdAr/+5zSEsVln7zcFIa1izSI+eKKVtmZtY6unTCVX6Ue1dEDGjlckbmcrrk9B8nXJmZNV2thKtmv/Ntz6CKCmWNp5kBEk0s5yBSL/IfEbFnhf2lcIw/5u8jaKN70BRN/cdCS4ZsgIM2zMya3fh2gqAKImIB0JK93qOBY/Lj6GI5wwEkDSeHY0jaFzgOWCrpiHzo/Igof1dsZmbdTN13vh0xqKJOgMRl+RpzJZ2dt+0l6XeFY/bO51W7/qGSZkuaI+m8vO1MYFfgGkkXVDhnuXAMYB3gcuBe4DVSMtW4wvFHSJqc3+teIalHlbocJOnC/PmUPI0JSZtJmpQ/D5L0oKSpku6VtEHe3k/SPXn7RFVYQUnSj/O971G23SEbZmatpNEBVx0tqKJigET2g/yMfVtgd0nbkhq9LXJdAY4Crq104Tyy+TxgL9JUoyGSDoiIc0iBFIdHxOnl59UIx9gC2Df/rrMkrSJpS9L6vLtExEBgKXB4ld86Edgtf94N+JekjfLnCZJWAX4NDIuIQfl3/TQffyVwUt5+GnBp2W+9AFgfOCoilpb9HodsmJm1kkYfO3e0oIpqARIAB0s6lvTbNiAtSDBL0g3AEZKuI62re2SVOgwBxudRwUi6KZf3uyrH1zMmIpYASyS9BHwI+BTpHwyP56CLnsBLlU7Oa/72krQW8BHg5lyf3YA7SP8wGgDcn6/VA3hRKTJyZ+C2vB1gtcKlfwQ8FhHHNvN3mZlZMzXa+HbkoIr3KM3XPQ0YEhGv5kfapTKuA/4AvAXcFhHvNOXaK6C4+tBS0j0XcH1E/E+D13iY1FufR+oJf530D4jvAJsAcyNip+IJktYGXss960oeBwZJWq/sH0zv44QrM7OW1ehj544WVFEtQGJt4A1goaQPAZ8tnRARLwAvkHKNr6vxWyeTHlf3ye9BDwUerHF8Ua1wjKKxwDBJH8y/YT1JH61x/ETSPyomkO7pnsCSiFhIukfrl/775MfaW0fE68D8PEIbJdsVrnkPcC5pDnMjdTYzsxbSaOPb0YIqqgVIzCQ1Tk+RHs9OKjvvJtIj9Cer/dCIeDHXcRwwE5gaEb+vdnyZWuEYxTKeIP0j4L78yPx+0iPyaiaSHjlPyO9mnyP/Ayi/ax4GnJfv/wzS42ZI75GPztvnAl8sq8dtpPf1oyX1xMzM2kTdkA11oaAKSRcD0yPimtYqoytyyIaZWdOpNUI2OhtJU0mPpL/T3nXpbFo6ZKPEYRtm1l3VbXxbIaiiWjnDy7cpBVWcV7b5vaAKNSFBKk+3Kb/+Yyw/AhjgqxExO+9fn/SIfFXSCO8Pkx5zv837B4PNB/6PlLRVdY5yLdXqQ5q+NSEi/iTp28CVEbG4iddeFBG9mlMvMzNrWR265xsR95JCKhomaeVGRzJHxCfqHPIpYHYpzUvSPVRIuCqUvUdT6tqE+swufP42abpXkxpfMzPrODrdqkaSfqCUgPUQaY4rSmlbv5I0BThF0qeUVv2ZLelaSavl4xZIOj9vn6xly+r1lfSApFmSxkraRCmn+Xzgi3kA1VnUSLiqUM/1JP0uX/PRHPaBpBG5TuMlPSvp5MI5P5I0T9JDkm6RdFrePlIpDexkYENgnKRxeV+1xLBNJT2Sf+tPyup2uqTHc93OrlJ/J1yZmbWSTtX4ShoEfIU0d/hzpECMklXzi+1LgJHAIRGxDal3f3zhuIV5+8XAr/K2X5Pm3W5LGhF9UUTMYPnEqrOpkXBVwdmkwV3bAt8HflPYVyn1agjwZWA70hSp972kj4iLSNOl9qy0sEOZ/wMuy7/1vVHlkvYB+ueyB5Lm+g6tUJYTrszMWkmHfuxcwW7AnaX3nZJGF/aVpjhtTnov/HT+fj1wAssa2lsKf/8yf94J+FL+fAOpx7uidiU1pkTEA5I+kIMvoHLq1S7A7/P0qrck/WEFy9+lVD7pN5Xene+T/0zP33uRGuMJ1S7kkA0zs5bV2RrfWt5o8Lio8rktVUq9aq6mJIZBStf6eURcsQJlmpnZCuhUj51JvbMDlFY1Wgv4QoVj5gF9S+9zSaOFiwlVhxT+fiR/fpj0OBtSMMXEFqjrxHyt0kCsV3LqVDWTgC9IWl0pl7naiOnyFK1qiWGTWP43ldwLfD2XgaSNSklbZmbWNjpVzzcipkkaRUqeeomUT1x+zFuSjiItKLByPubywiHr5lSpJaToSICTgOsknQ68TMpRXlEjgGtzWYuBr9U6OCIez4/RZwH/JI1wrjTS6UrgHkkv5Pe+pcSwl0nvpEvTiU4Bbpb0PeC9hK6IuE9pVaVHlBZcWAQcQZWFHczMrOXVTbjqSiQtAAZHxCvtXZdKJPWKiEWS1iD18o+NiGntXS8nXJmZNV23SriSdDVwYc5PruS7kl5qJJijGWXvwQqEbABXStqK9O72+lLDK+kc2jFkwwlXZmYtq8s1vqVAjCr7+uZUrBVSLXmLNL2n2SLisCrbzyx8dciGmVkn19kGXL0nB2M8JekmSU9Kul3SGjm8YnA+5jOSpkmaKWlshWscI+nuPICrWljFSEmX58CJpyXtFxH35rm/xT8Hll3bIRtmZlZRp218s82BSyNiS+B14FulHUq5zFcBX46I7Uj5yBT2n0gaUXxARLxZp5y+pFCKzwOXSyqf0lOJQzbMzKyizt74PhcRpTV7byQFW5R8kvSedD5ARPy7sO9IUgM3LIdd1PPbiHg3Ip4BniU1nvXsSgq3ICIeAN4XspEHfr0vZCMi/gO0RMhGKVDkhsL2YsjGtPxb+q9gWWZm1gSd/Z1v+VDtRoduzyb1+jYmvastP7deWMWKDhHvVCEbTrgyM2tZnb3nu4mknfLnw4DiakOPAkMlbQrpHWxh33Tgm8BoSRvmbdXCKgAOkrSSpH7AZqQgj3ocsmFmZhV19sZ3HnCCpCeBdYHLSjsi4mXgWOAOSTNZlv1c2v8QcBowRlIfloVVPEzhHWn2N2AycDdwXM5frmcE6X3qLOBcGgjZAEohG3dTP2RjXP5erd6nkO7NbGCjQjn3ATeTQjZmA7ezfGNuZmatrNOGbEjqC9wVEQNauZyRuZzbW7OcXJZDNszMuohuFbJRLs/rXdQaoRr5+uuTep6rAicDHwbOAf5RaURynSCOUshGb+DBQsjG/sBWEXGupAOAp2uEiFSr5/hcbpNb0dYK2ajFARxm1pV12sY3IhYAzer1Slo5It5psJzhFc4vhmysQ3psOyEiJkq6BzgmP9ZuklLIhqThFKYaRcRo0iNpgANIjX2TGl8zM+s4Ovs734ok/SAHYjxEmgtMDrX4laQpwCmSPiVpeg6huFbSavm4BZLOz9snK6+OlEM9HsjvcL8L7A8MJ93Dd4FNJZ1FmmJ0jaQLGqjnjjkIY7qkhyVtLmlVUs/5EEkzJB0iabikiyXtnMu9IO/rp+VDRfoo5VejFBxyq1IAyZ1Az0K5++Ryp0m6rTT4yszM2kaXa3wlDSKN8h0IfA4YUti9an7+fgkwEjgkh1CsDBxfOG5h3n4x8Ku87dekvOVtgZuAiyJiBnAmMCqnXJ1NWlno8Ig4vYHqPgXsFhHb5+v8LCLeLrvmewPFIuJhUg/49LzvLzWufTywOAeQnAUMyvenD/BD4NMRsUOu76nlJzvhysys9XS5xhfYDbgzIhbnqT2jC/tKDdnmwPyIeDp/vx4opjzdUvi7NJVpJ9IoYUihFcVAj+bqTVr6cA7wS2DrFrhmyVBS8AgRMYs0ihpS+MhWwCRJM0ijsD9afrITrszMWk+nfefbTG80eFxU+dzSfgyMi4gD8+jt8c24xjss+0dUI7GXAu6PiEPrHpk5ZMPMrGV1xZ7vBOCA/M5zLeALFY6ZB/Qtvc8Fvgo8WNh/SOHvR/Lnh1k+tGJiC9S1N/B8/jy8sL08SIMa+xaQHykDwwrbJ5CCR5A0ANg2b38U2KXwLntNSR9vXvXNzKw5ulzjm6fnjAJmksIqHq9wzFvAUaRHvrNJA6YuLxyybh5YdQrw/+VtJwFH5e1fzftW1PnAzyVNZ/mnEOOArUoDrsrOuRU4PQ/S6gf8Ajg+X6NP4bjLgF45gOQcYCq8Fz4yHLgl/5ZHaCyr2szMWkinDdloLXm08OC86IHhkA0zs+aoFbLR5Xq+ZmZmHV13G3BVV0T0rXeMpKuBC6ulTOVUrQ1Ja+YWzY+I8kUbmqQUwBERJ67IdZqiPRKuWpPTs8ysvbnxbYaI+EYDhz0dEce2emVaSFNSv8zMbMX4sXMNOdXqKUk35aSo2yWtUZYq9ZmcFDVT0tgK1zhG0t159PWiwvZhedEGJI2UdHkOtXhaUrXlBEs2lHSPpGcknV+45qE5mWuOpPMK2+uV+xhp8Fex3g7ZMDNrJe751rc5cHRETJJ0LfCt0g6lRRWuAoZGxHwtv2Ywkk4E9gYOiIglkmqV05f0mLofME7Sx2osXTgQ2B5YAsyT9GtgKSlvehDwKnCfpAMi4nd1ft/GwM4RsbS4MSKuJC1fyGob9PeoPDOzFuSeb33PRcSk/PlGlk+2+iRpQYX5ABHx78K+I4HPAsMiYkkD5fw2It6NiGeAZ6k9/WdsRCzMjfMTpISqIcD4iHg5Pz6+ieVTu6q5rbzhNTOz1uWeb33lvb5Ge4GzST3UjYH5Fc4tT6NqSjnFxnwp9f871iq3buqXE67MzFqWe771bSKplO98GFBcKvBRYKikTQHKHjtPB74JjJa0Yd72T0lbSloJKB/1fJCklXJwxmakFK6mmAzsnlc26gEcyrLUrlrlmplZG3PjW9884IScFLUuKTkKeC8t6ljgDkkzWbZwQ2n/Q8BpwJi8mtAZpLV4HwZeLCvnb6QG9G7guBrveyuKiBfz9ceR0r2mRsTv8+5a5ZqZWRtzwlUNebGDuyJiQCuXMzKXc3trltNcTrgyM2u6WglXfufbDJIWRUSv/Dj5oogYVuGY8cBpEdGmrVaxXEl/BA6LiNeqHPtt4MqIWFzrml0tZMO6LgeoWGfhxreGiFgAVO31RsQLLL+SUHPLGV6+TdK+pKlDRU1KyIqIz9U55NukEdw1G18zM2tZ3e6dr6QjJE3OKwZdIalHjRCKD0m6MwdozJS0c9m1+kqakz/3lHRrDuO4E+hZOG4fSY/kMI7bJPXK28+U9HgOxbhSeSJw7r3uBbwNrAGcFBEDKzW8dcpdkAdgrSlpTP4NcyQdIulkUgTmOEnjWubumplZI7pV4ytpS9IavbtExEDSNJ3Da5xyEfBgRGwH7ADMrXHs8cDiiNgSOIu8xm4eaPVD4NMRsQMwBTg1n3NxRAzJ75R7AsVkq5UjYkdS7/SsppZb5jPACxGxXS7rnoi4CHgB2DMi9iw/wQlXZmatp7s9dv4UqXF6PHcyewIv1Th+L1JYBjmIolYrNJTUWBMRs5TWyoUUxLEVMCmXuSppDV2APSV9l9S7XY/UuP8h77sj/z2VlH7V1HKLZgP/myMn74qIiTWuR76WE67MzFpJd2t8BVwfEf+z3EbpO4Wv5SEULVHm/RFxaFmZqwOXklYoek5pJaRi2aUgjUZCNGqKiKcl7QB8DviJpLERcU6j5ztkw8ysZXWrx87AWGCYpA9CCsWQ9FGqh1CMJT3WJb8b7l3j2hNIIRxIGgBsm7c/Cuwi6WN535qSPs6yhvaV/A64uQO3qpX7njwqe3FE3AhcQHqEDvAfYK1mlmtmZs3UrRrfvP7uD0mLDswC7gc2oHoIxSmkR8OzSY9/t6px+cuAXjmM45x8fCmIYzhwSy7zEWCLPP3nKmAOcC/weDN/VsVyy2wDTJY0g/Re+Cd5+5XAPR5wZWbWthyyYXU5ZMPMrOlqhWx0q56vmZlZR9CtBly1VVxknTpUTcWqc15DoRuS9iAlXO0naX9gq4g4t8o1BwIbRsQfa5XthCsz645aMzGtWzW+bUnSynld3eU0NxUrIu4lvRtuyjmjgdE1DhkIDAZqNr5mZtayuuNj5x6SrpI0V9J9OSHqmJw0NVPS/5O0hqTekv6aR0CXRik/J2kVSf0k3SNpqqSJkrbIx4yUdLmkx4DzJe2ek7RmSJouaa2yVKzhku7I13pG0vmlSlZLxapE0mckPSVpGvClwvbhki7Onw/K6VYzJU2QtCppgNYhuX6HlF3TIRtmZq2kOza+/YFLImJr4DXgy8AdOWlqO+BJ4OiIWAjMAHbP5+0H3BsR/yWNEj4pIgaRlgy8tHD9jYGdI+LUvO+EnKa1G/BmhfoMJKVubUNqCD9SJxVrOXm+8FXAF0gBIh+u8rvPBPbNv3H/iHg7bxuVoyvLl0O8MiIGR8TgHmvUmmFlZmZN1R0fO8+PiBn5cyk9aoCknwDrAL1Y9nh3FKlhHAd8Bbg090B3Bm7LiVUAqxWuf1tOwwKYBFwo6SZSA//3wjklY3NDj6QngI/melRLxSq3Rf5Nz+Rr3EhaY7jcJGCkpN+yLD2rIQ7ZMDNrWd2x8V1S+LyUFDE5EjggImZKGg7skfePBn4maT1Sr/IBYE3gtdybreSN0oeIOFfSGFKy1KQ8aOqtOvVZmSqpWCsiIo6T9Ang88BUSZUyoM3MrA10x8fOlawFvChpFQoLLUTEIlL4xf+RRkkvjYjXgfmSDgJQsl2li0rqFxGzI+K8fJ0tGqxPtVSsSp4C+krql79XbLBzXR6LiDOBl4GP4IQrM7N20R17vpX8CHiM1Cg9xvIN0ijgNpb1hiE10JdJ+iGwCnArMLPCdb8taU/gXdKiCXeTErVqioiXcw/8FkmlR9o/BJ6ucOxbko4FxkhaDEykcoN6gaT+pF712FzfvwFn5OSrn5e/9y2ZOnXqIknz6tXb6AO80t6V6OB8jxrj+1RfZ7hHH622wwlXVpekKdVSWmwZ36f6fI8a4/tUX2e/R37sbGZm1sb82LkTkXQnsGnZ5u/lAA4zM+sk3Ph2IuVRkm3oynYqt7PxfarP96gxvk/1dep75He+ZmZmbczvfM3MzNqYG18zM7M25sbX3pMXaJgn6c+SzqiwfzVJo/L+x/ISjd1OA/dpaF4Q4x1JTV7Bqito4B6dKukJSbMkjZVUdT5kV9bAfTpO0uy8+MlDkrZqj3q2p3r3qHDclyWFpE4x/ciNrwEgqQdwCfBZUq70oRX+D/1o4NWI+BjwS96/vnCX1+B9+hswHLi5bWvXMTR4j6YDgyNiW+B24Hy6mQbv080RsU2Osz0fuLCNq9muGrxHSFoLOIUUktQpuPG1kh2BP0fEs3nFo1uBL5Yd80Xg+vz5duBTqrBSRBdX9z5FxIKImEVKNuuOGrlH4yJicf76KGk1sO6mkfv0euHrmkB3GyHbyP9fAvgxqTNQnp3fYbnxtZKNgOcK3/+et1U8JiLeARYCH2iT2nUcjdyn7q6p9+hoUvRqd9PQfZJ0gqS/kHq+J7dR3TqKuvdI0g7ARyJiTFtWbEW58TWzdiPpCGAwcEF716WjiohLIqIf8D1SxrtlklYiPYr/TnvXpanc+FrJ86SVjko2ztsqHiNpZaA38K82qV3H0ch96u4aukeSPg38ANg/IpaU7+8Gmvq/pVuBA1q1Rh1PvXu0FjAAGC9pAfBJYHRnGHTlxtdKHgf6S9pU0qrAV0jrGReNBr6WPw8DHojul9LSyH3q7ureI0nbA1eQGt6X2qGOHUEj96l/4evngWfasH4dQc17FBELI6JPRPSNiL6k8QP7R8SU9qlu49z4GvDeO9wTgXuBJ4HfRsRcSedI2j8fdg3wAUl/Bk4Fqg7776oauU+Shkj6O3AQcIWkue1X47bX4P+WLgB6AbflaTTd7h8wDd6nEyXNzct+nsqyf/x2Cw3eo07J8ZJmZmZtzD1fMzOzNubG18zMrI258TUzM2tjbnzNzMzamBtfMzOzNubG18zMrI258TUzM2tj/z8g3gfpB+SAfAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light",
"tags": []
},
"output_type": "display_data"
}
],
"source": [
"feat_importances = pd.Series(regr.feature_importances_, index=X_temp.columns)\n",
"feat_importances.nlargest(len(X_temp.columns)).plot(kind='barh')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_9P7iZMGxMjK"
},
"outputs": [],
"source": [
"import pickle\n",
"f = open('random_forest2.pkl', 'wb')\n",
"pickle.dump(regr, f)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "rf_new_data_80est.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "uxa8uaiWhPuw"
},
"source": [
"# Read data into a dataframe"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pYYleLjJhr_o"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import OneHotEncoder \n",
"import numpy as np\n",
"from sklearn.ensemble import RandomForestRegressor \n",
"from matplotlib import pyplot as plt "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RP4jhT3Chr_u"
},
"outputs": [],
"source": [
"# !unzip 'train.zip'\n",
"df = pd.read_csv(\"train.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "80n8bJeDhPuy"
},
"source": [
"# Outlier detection and removal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 320
},
"id": "Cgw2yRgU5zBZ",
"outputId": "d3ddca62-9631-4ef8-c6b6-6052e5bec3f4"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>vendor_id</th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>trip_duration</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" <td>1.458644e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.534950e+00</td>\n",
" <td>1.664530e+00</td>\n",
" <td>-7.397349e+01</td>\n",
" <td>4.075092e+01</td>\n",
" <td>-7.397342e+01</td>\n",
" <td>4.075180e+01</td>\n",
" <td>9.594923e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.987772e-01</td>\n",
" <td>1.314242e+00</td>\n",
" <td>7.090186e-02</td>\n",
" <td>3.288119e-02</td>\n",
" <td>7.064327e-02</td>\n",
" <td>3.589056e-02</td>\n",
" <td>5.237432e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.435970e+01</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.218114e+01</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.399187e+01</td>\n",
" <td>4.073735e+01</td>\n",
" <td>-7.399133e+01</td>\n",
" <td>4.073588e+01</td>\n",
" <td>3.970000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.398174e+01</td>\n",
" <td>4.075410e+01</td>\n",
" <td>-7.397975e+01</td>\n",
" <td>4.075452e+01</td>\n",
" <td>6.620000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>-7.396733e+01</td>\n",
" <td>4.076836e+01</td>\n",
" <td>-7.396301e+01</td>\n",
" <td>4.076981e+01</td>\n",
" <td>1.075000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2.000000e+00</td>\n",
" <td>9.000000e+00</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>5.188108e+01</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>4.392103e+01</td>\n",
" <td>3.526282e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" vendor_id passenger_count ... dropoff_latitude trip_duration\n",
"count 1.458644e+06 1.458644e+06 ... 1.458644e+06 1.458644e+06\n",
"mean 1.534950e+00 1.664530e+00 ... 4.075180e+01 9.594923e+02\n",
"std 4.987772e-01 1.314242e+00 ... 3.589056e-02 5.237432e+03\n",
"min 1.000000e+00 0.000000e+00 ... 3.218114e+01 1.000000e+00\n",
"25% 1.000000e+00 1.000000e+00 ... 4.073588e+01 3.970000e+02\n",
"50% 2.000000e+00 1.000000e+00 ... 4.075452e+01 6.620000e+02\n",
"75% 2.000000e+00 2.000000e+00 ... 4.076981e+01 1.075000e+03\n",
"max 2.000000e+00 9.000000e+00 ... 4.392103e+01 3.526282e+06\n",
"\n",
"[8 rows x 7 columns]"
]
},
"execution_count": 3,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ymGBKkTlhPu1"
},
"source": [
"Max value of trip duration : 3526282 second ~ 41 days which is impossible\n",
"\n",
"Clearly there are some outliers in the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 415
},
"id": "uTuo_KlphPu1",
"outputId": "6bfff3b2-e094-4918-9319-6f1e1c54de49"
},
"outputs": [
{
"data": {
"text/plain": [
"{'boxes': [<matplotlib.lines.Line2D at 0x7f5e8a776a20>],\n",
" 'caps': [<matplotlib.lines.Line2D at 0x7f5e8a70e470>,\n",
" <matplotlib.lines.Line2D at 0x7f5e8a70e7f0>],\n",
" 'fliers': [<matplotlib.lines.Line2D at 0x7f5e8a70eef0>],\n",
" 'means': [],\n",
" 'medians': [<matplotlib.lines.Line2D at 0x7f5e8a70eb70>],\n",
" 'whiskers': [<matplotlib.lines.Line2D at 0x7f5e8a776d30>,\n",
" <matplotlib.lines.Line2D at 0x7f5e8a70e0f0>]}"
]
},
"execution_count": 4,
"metadata": {
"tags": []
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEDCAYAAAAlRP8qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAP8ElEQVR4nO3df4ylVX3H8ffHYWRNbSVxp5HA4pqKdnBa/DGhWDcNizEBY+APsWXTqDQjW62gJraJ6SSoJPOHSaOJYDXbDhGMHbVozVaghsRJcKogAwEKjDYbGsMSEkZAkCiwS779Yy44jDN77+ze2TtzeL+SG57nnLPP8/1j8+HZ85xzb6oKSdLW97JBFyBJ6g8DXZIaYaBLUiMMdElqhIEuSY0w0CWpEQMN9CTXJHkkyb09jv/LJPcnuS/Jv210fZK0lWSQ69CT/AXwFHBdVY11GXs68C3g3Kp6PMkfVtUjx6NOSdoKBvqEXlW3AI8tb0vyR0n+K8kdSX6Y5I87XZcCX6qqxzt/1jCXpGU24xz6PuDyqnob8PfAP3fa3wC8Icl/J7k1yXkDq1CSNqETBl3AckleCfw58O9Jnm8+sfPfE4DTgXOAU4FbkvxJVf3yeNcpSZvRpgp0lv7F8MuqevMqfQeB26rqEPB/Sf6XpYC//XgWKEmb1aaacqmqJ1kK6/cBZMmZne7vsvR0TpLtLE3BPDCIOiVpMxr0ssUZ4MfAG5McTDIB/DUwkeRu4D7gws7w7wOPJrkfmAX+oaoeHUTdkrQZDXTZoiSpfzbVlIsk6egN7KXo9u3ba+fOnYO6vSRtSXfccccvqmpktb6BBfrOnTuZn58f1O0laUtK8vO1+pxykaRGGOiS1AgDXZIaYaBLUiMMdElqhIEuLTMzM8PY2BhDQ0OMjY0xMzMz6JKknm22L+eSBmZmZobJyUmmp6fZtWsXc3NzTExMALBnz54BVyd11/UJPcm2JD9Jcnfnp98+u8qYS5IsJrmr8/nQxpQrbZypqSmmp6fZvXs3w8PD7N69m+npaaampgZdmtSTrt/lkqUvJv+9qnoqyTAwB3y8qm5dNuYSYLyqLuv1xuPj4+XGIm0mQ0NDPP300wwPD7/QdujQIbZt28Zzzz03wMqk30pyR1WNr9bX9Qm9ljzVOR3ufPxGLzVndHSUubm5F7XNzc0xOjo6oIqk9enppWiSoSR3AY8AN1fVbasMe2+Se5Jcn2THGtfZm2Q+yfzi4uIxlC313+TkJBMTE8zOznLo0CFmZ2eZmJhgcnJy0KVJPVnX1+cmOQn4D5Z+8/PeZe2vBp6qqmeS/C3wV1V17pGu5ZSLNqOZmRmmpqZYWFhgdHSUyclJX4hqUznSlMu6vw89yRXAr6vqn9boHwIeq6pXHek6Brokrd8xzaEnGek8mZPkFcC7gJ+uGHPystMLgIWjL1eSdDR6WYd+MnBt58n7ZcC3qup7Sa4E5qtqP/CxJBcAh4HHgEs2qmBJ0uoG9hN0TrlI0vod05SLJGlrMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDWia6An2ZbkJ0nuTnJfks+uMubEJN9MciDJbUl2bkSxkqS19fKE/gxwblWdCbwZOC/J2SvGTACPV9XrgS8An+tvmZKkbroGei15qnM63PnUimEXAtd2jq8H3pkkfatSktRVT3PoSYaS3AU8AtxcVbetGHIK8CBAVR0GngBevcp19iaZTzK/uLh4bJVLkl6kp0Cvqueq6s3AqcBZScaO5mZVta+qxqtqfGRk5GguIUlaw7pWuVTVL4FZ4LwVXQ8BOwCSnAC8Cni0HwVKknrTyyqXkSQndY5fAbwL+OmKYfuBD3aOLwJ+UFUr59klSRvohB7GnAxcm2SIpf8BfKuqvpfkSmC+qvYD08DXkhwAHgMu3rCKJUmr6hroVXUP8JZV2q9Ydvw08L7+liZJWg93ikpSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmN6BroSXYkmU1yf5L7knx8lTHnJHkiyV2dzxWrXUuStHG6/kg0cBj4ZFXdmeT3gTuS3FxV968Y98Oqek//S5Qk9aLrE3pVPVxVd3aOfwUsAKdsdGGSpPVZ1xx6kp3AW4DbVul+e5K7k9yU5E1r/Pm9SeaTzC8uLq67WEnS2noO9CSvBL4NfKKqnlzRfSfw2qo6E7gK+O5q16iqfVU1XlXjIyMjR1uzJGkVPQV6kmGWwvzrVfWdlf1V9WRVPdU5vhEYTrK9r5VKko6ol1UuAaaBhar6/BpjXtMZR5KzOtd9tJ+FSpKOrJdVLu8A3g/8T5K7Om3/CJwGUFVfAS4CPpLkMPAb4OKqqg2oV5K0hq6BXlVzQLqMuRq4ul9FSZLWz52iktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGurTMzMwMY2NjDA0NMTY2xszMzKBLknrWy8Yi6SVhZmaGyclJpqen2bVrF3Nzc0xMTACwZ8+eAVcndZdBbegcHx+v+fn5gdxbWs3Y2BhXXXUVu3fvfqFtdnaWyy+/nHvvvXeAlUm/leSOqhpftc9Al5YMDQ3x9NNPMzw8/ELboUOH2LZtG88999wAK5N+60iB7hy61DE6Osrc3NyL2ubm5hgdHR1QRdL6GOhSx+TkJBMTE8zOznLo0CFmZ2eZmJhgcnJy0KVJPfGlqNTx/IvPyy+/nIWFBUZHR5mamvKFqLYM59AlaQtxDl2SXgIMdElqhIEuLeNOUW1lvhSVOtwpqq3Ol6JShztFtRUc00vRJDuSzCa5P8l9ST6+ypgk+WKSA0nuSfLWfhQuHU8LCwvs2rXrRW27du1iYWFhQBVJ69PLHPph4JNVdQZwNvDRJGesGHM+cHrnsxf4cl+rlI4Dd4pqq+sa6FX1cFXd2Tn+FbAAnLJi2IXAdbXkVuCkJCf3vVppA7lTVFvdul6KJtkJvAW4bUXXKcCDy84PdtoeXvHn97L0BM9pp522vkqlDbZnzx5+9KMfcf755/PMM89w4okncumll/pCVFtGz8sWk7wS+Dbwiap68mhuVlX7qmq8qsZHRkaO5hLShpmZmeGGG27gpptu4tlnn+Wmm27ihhtucOmitoyeAj3JMEth/vWq+s4qQx4Cdiw7P7XTJm0ZU1NTTE9Ps3v3boaHh9m9ezfT09NMTU0NujSpJ72scgkwDSxU1efXGLYf+EBntcvZwBNV9fAaY6VNyVUu2up6eUJ/B/B+4Nwkd3U+707y4SQf7oy5EXgAOAD8C/B3G1OutHFc5aKtrutL0aqaA9JlTAEf7VdR0iA8v8pl5U5Rp1y0Vbj1X+rw+9C11bn1X5K2EL8PXZJeAgx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjugZ6kmuSPJLk3jX6z0nyxLIfkL6i/2VKkrrp5TdFvwpcDVx3hDE/rKr39KUiSdJR6fqEXlW3AI8dh1okScegX3Pob09yd5KbkrxprUFJ9iaZTzK/uLjYp1tLkqA/gX4n8NqqOhO4CvjuWgOral9VjVfV+MjISB9uLUl63jEHelU9WVVPdY5vBIaTbD/myiRJ63LMgZ7kNUnSOT6rc81Hj/W6kqT16brKJckMcA6wPclB4NPAMEBVfQW4CPhIksPAb4CLq6o2rGJJ0qq6BnpV7enSfzVLyxolSQPkTlFJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEV0DPck1SR5Jcu8a/UnyxSQHktyT5K39L1OS1E0vT+hfBc47Qv/5wOmdz17gy8deliRpvboGelXdAjx2hCEXAtfVkluBk5Kc3K8CJUm96ccc+inAg8vOD3bafkeSvUnmk8wvLi724daSpOcd15eiVbWvqsaranxkZOR43lqSmtePQH8I2LHs/NROmyTpOOpHoO8HPtBZ7XI28ERVPdyH60qS1uGEbgOSzADnANuTHAQ+DQwDVNVXgBuBdwMHgF8Df7NRxUqS1tY10KtqT5f+Aj7at4okSUfFnaKS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSI3oK9CTnJflZkgNJPrVK/yVJFpPc1fl8qP+lSpKOpOuPRCcZAr4EvAs4CNyeZH9V3b9i6Der6rINqFGS1INentDPAg5U1QNV9SzwDeDCjS1LkrRevQT6KcCDy84PdtpWem+Se5Jcn2THahdKsjfJfJL5xcXFoyhXkrSWfr0U/U9gZ1X9KXAzcO1qg6pqX1WNV9X4yMhIn24tSYLeAv0hYPkT96mdthdU1aNV9Uzn9F+Bt/WnPElSr3oJ9NuB05O8LsnLgYuB/csHJDl52ekFwEL/SpQk9aLrKpeqOpzkMuD7wBBwTVXdl+RKYL6q9gMfS3IBcBh4DLhkA2uWJK0iVTWQG4+Pj9f8/PxA7i1JW1WSO6pqfLU+d4pKUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEZ0/T506aUkye+0DeorpqX18gld6lgtzI/ULm02BrokNcJAl6RGGOiS1IieAj3JeUl+luRAkk+t0n9ikm92+m9LsrPfhUqSjqxroCcZAr4EnA+cAexJcsaKYRPA41X1euALwOf6Xagk6ch6eUI/CzhQVQ9U1bPAN4ALV4y5ELi2c3w98M64NECSjqte1qGfAjy47Pwg8Gdrjamqw0meAF4N/GL5oCR7gb0Ap5122lGWrJe0z7xqwy5dn/6D43/fzzyxMdfVS9Jx3VhUVfuAfQDj4+Pu1tD6bWAAHukflW4u0lbQy5TLQ8COZeendtpWHZPkBOBVwKP9KFCS1JteAv124PQkr0vycuBiYP+KMfuBD3aOLwJ+UD7SaItZ66+sf5W1VXSdcunMiV8GfB8YAq6pqvuSXAnMV9V+YBr4WpIDwGMshb605Rje2sp6mkOvqhuBG1e0XbHs+Gngff0tTZK0Hu4UlaRGGOiS1AgDXZIaYaBLUiMyqLf6SRaBnw/k5lJ321mx01naJF5bVSOrdQws0KXNLMl8VY0Pug5pPZxykaRGGOiS1AgDXVrdvkEXIK2Xc+iS1Aif0CWpEQa6JDXCQJeWSXJNkkeS3DvoWqT1MtClF/sqcN6gi5COhoEuLVNVt7D0nf7SlmOgS1IjDHRJaoSBLkmNMNAlqREGurRMkhngx8AbkxxMMjHomqReufVfkhrhE7okNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY34f+5umTA2mWbVAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light",
"tags": []
},
"output_type": "display_data"
}
],
"source": [
"plt.boxplot(df['trip_duration'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YMHAxF7h7W1m",
"outputId": "9f9b589f-d945-4c29-fa87-9f5161404067"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Instances with trip duration greater than 2092.0 are outliers as per Boxplot analysis.\n"
]
}
],
"source": [
"Q1 = np.percentile(df['trip_duration'], 25, interpolation = 'midpoint') \n",
"Q2 = np.percentile(df['trip_duration'], 50, interpolation = 'midpoint') \n",
"Q3 = np.percentile(df['trip_duration'], 75, interpolation = 'midpoint') \n",
"IQR = Q3 - Q1\n",
"low_lim = Q1 - 1.5 * IQR \n",
"up_lim = Q3 + 1.5 * IQR\n",
"print(\"Instances with trip duration greater than {} are outliers as per Boxplot analysis.\".format(up_lim))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4igtTKF7hPu2"
},
"source": [
"Hence we can safely consider instances with trip duration > 5900 second as outliers.\n",
"Also trip duration < 60 second(~ 1 min) does not make any sense. Hence we will remove such instances as well. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sxsY-b-IGWFl"
},
"outputs": [],
"source": [
"df = df[(df.trip_duration < 5900)]\n",
"# df = df[(df.trip_duration > 60)]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KkVIECiehPu2"
},
"source": [
"Instances with passenger_count = 0 also need to be removed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YWyc3pQjhPu2"
},
"outputs": [],
"source": [
"df = df[(df.passenger_count > 0)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 320
},
"id": "TvMqWf8jT1ab",
"outputId": "c9e22ea9-3cb5-4c14-b4c2-bb7b31de5062"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>vendor_id</th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>trip_duration</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.455957e+06</td>\n",
" <td>1.455957e+06</td>\n",
" <td>1.455957e+06</td>\n",
" <td>1.455957e+06</td>\n",
" <td>1.455957e+06</td>\n",
" <td>1.455957e+06</td>\n",
" <td>1.455957e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.534271e+00</td>\n",
" <td>1.664020e+00</td>\n",
" <td>-7.397352e+01</td>\n",
" <td>4.075095e+01</td>\n",
" <td>-7.397343e+01</td>\n",
" <td>4.075181e+01</td>\n",
" <td>8.346726e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>4.988243e-01</td>\n",
" <td>1.313639e+00</td>\n",
" <td>7.087820e-02</td>\n",
" <td>3.283941e-02</td>\n",
" <td>7.060772e-02</td>\n",
" <td>3.584790e-02</td>\n",
" <td>6.483105e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.435970e+01</td>\n",
" <td>-1.219333e+02</td>\n",
" <td>3.218114e+01</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.399187e+01</td>\n",
" <td>4.073737e+01</td>\n",
" <td>-7.399133e+01</td>\n",
" <td>4.073590e+01</td>\n",
" <td>3.970000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2.000000e+00</td>\n",
" <td>1.000000e+00</td>\n",
" <td>-7.398174e+01</td>\n",
" <td>4.075411e+01</td>\n",
" <td>-7.397975e+01</td>\n",
" <td>4.075453e+01</td>\n",
" <td>6.610000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.000000e+00</td>\n",
" <td>2.000000e+00</td>\n",
" <td>-7.396735e+01</td>\n",
" <td>4.076836e+01</td>\n",
" <td>-7.396302e+01</td>\n",
" <td>4.076982e+01</td>\n",
" <td>1.072000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2.000000e+00</td>\n",
" <td>9.000000e+00</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>5.188108e+01</td>\n",
" <td>-6.133553e+01</td>\n",
" <td>4.392103e+01</td>\n",
" <td>5.897000e+03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" vendor_id passenger_count ... dropoff_latitude trip_duration\n",
"count 1.455957e+06 1.455957e+06 ... 1.455957e+06 1.455957e+06\n",
"mean 1.534271e+00 1.664020e+00 ... 4.075181e+01 8.346726e+02\n",
"std 4.988243e-01 1.313639e+00 ... 3.584790e-02 6.483105e+02\n",
"min 1.000000e+00 1.000000e+00 ... 3.218114e+01 1.000000e+00\n",
"25% 1.000000e+00 1.000000e+00 ... 4.073590e+01 3.970000e+02\n",
"50% 2.000000e+00 1.000000e+00 ... 4.075453e+01 6.610000e+02\n",
"75% 2.000000e+00 2.000000e+00 ... 4.076982e+01 1.072000e+03\n",
"max 2.000000e+00 9.000000e+00 ... 4.392103e+01 5.897000e+03\n",
"\n",
"[8 rows x 7 columns]"
]
},
"execution_count": 8,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nDl5UVbAhPu2"
},
"source": [
"# Feature Extraction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "i2SF-MV1hr_z"
},
"outputs": [],
"source": [
"y = df['trip_duration'] \n",
"X = df.drop(['trip_duration'], axis=1)\n",
"X = X.drop('id',axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UbsYDCWfsV6a"
},
"source": [
"### Encoding vendor_id"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rD0BmhbJhPu3",
"outputId": "9abe1d57-8a96-46f1-a1a7-a0672f9dcf38",
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Vendor list : [2 1]\n"
]
}
],
"source": [
"vendor_id_list = pd.unique(X['vendor_id'])\n",
"print(\"Vendor list :\", vendor_id_list)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SYoAyI12hPu3"
},
"source": [
"There are two unique vendors in the dataset.\n",
"Since this is categorical data, we can perform one hot encoding on it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rWHK4tinhr_8",
"scrolled": true
},
"outputs": [],
"source": [
"#encoding vendor_id ={1,2} to vendor_id_1 and vendor_id_2 columns\n",
"encoded_vendor_id=pd.get_dummies(X['vendor_id'], prefix='vendor_id')\n",
"# Drop column vendor_id as it is now encoded\n",
"X = X.drop('vendor_id',axis = 1)\n",
"# Join original with encoded \n",
"X = X.join(encoded_vendor_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_ualwbX7yq6k"
},
"source": [
"### Encoding store_and_fwd_flag"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GHWPtsgfhPu3",
"outputId": "9371b984-4b9c-4544-840c-a7ec8dff9b88"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Flag Values : ['N' 'Y']\n"
]
}
],
"source": [
"flag_values = pd.unique(X['store_and_fwd_flag'])\n",
"print(\"Flag Values :\", flag_values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "R5LDruDZx7TM"
},
"outputs": [],
"source": [
"#encoding store_and_fwd_flag = {Y,N} to flag_1 and flag_2 columns\n",
"encoded_flag_id=pd.get_dummies(X['store_and_fwd_flag'], prefix='flag')\n",
"# Drop column store_and_fwd_flag as it is now encoded\n",
"X = X.drop('store_and_fwd_flag',axis = 1)\n",
"# Join original with encoded \n",
"X = X.join(encoded_flag_id)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "O1WUDPE1hPu3"
},
"source": [
"### Calculating distance related features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "6GIJD3IGhsAA"
},
"outputs": [],
"source": [
"X['lat_diff'] = abs(X['pickup_latitude'] - X['dropoff_latitude'])\n",
"X['long_diff'] = abs(X['pickup_longitude'] - X['dropoff_longitude'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "OhMPK8SZhPu3"
},
"outputs": [],
"source": [
"def get_euclidean_dist(p_lat, p_long, d_lat, d_long):\n",
" return np.sqrt(np.power(p_lat-d_lat, 2) + np.power(p_long-d_long, 2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pWybkDgQhPu3"
},
"outputs": [],
"source": [
"def get_haversine_dist(lat1, lng1, lat2, lng2):\n",
" lat1, lng1, lat2, lng2 = map(np.radians, (lat1, lng1, lat2, lng2))\n",
" AVG_EARTH_RADIUS = 6371 # in km\n",
" lat = lat2 - lat1\n",
" lng = lng2 - lng1\n",
" d = np.sin(lat * 0.5) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(lng * 0.5) ** 2\n",
" h = 2 * AVG_EARTH_RADIUS * np.arcsin(np.sqrt(d))\n",
" return(h)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZBKo6Af6hPu3"
},
"outputs": [],
"source": [
"\n",
"def get_manhattan_distance(lat1, lng1, lat2, lng2):\n",
" a = get_haversine_dist(lat1, lng1, lat1, lng2)\n",
" b = get_haversine_dist(lat1, lng1, lat2, lng1)\n",
" return a + b"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ooqpy-bghPu3"
},
"outputs": [],
"source": [
"X['euclidean_dist'] = get_euclidean_dist(X['pickup_latitude'].to_numpy(), X['pickup_longitude'].to_numpy(), X['dropoff_latitude'].to_numpy(), X['dropoff_longitude'].to_numpy())\n",
"X['haversine_dist'] = get_haversine_dist(X['pickup_latitude'].to_numpy(), X['pickup_longitude'].to_numpy(), X['dropoff_latitude'].to_numpy(), X['dropoff_longitude'].to_numpy())\n",
"X['manhattan_dist'] = get_manhattan_distance(X['pickup_latitude'].to_numpy(), X['pickup_longitude'].to_numpy(), X['dropoff_latitude'].to_numpy(), X['dropoff_longitude'].to_numpy())\n",
"# X['speed_haversine'] = X['manhattan_dist'] / y\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ON03y96nhPu3"
},
"source": [
"### Calculating time related features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Rt3E99CkhsAP"
},
"outputs": [],
"source": [
"X['pickup_datetime'] = pd.to_datetime(X['pickup_datetime'], \n",
"format = '%Y-%m-%d %H:%M:%S', \n",
" errors = 'coerce')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iBjPHG7m7g6v",
"outputId": "42d69795-9597-4d83-884e-2f1b4afc0104"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:16: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n",
" app.launch_new_instance()\n"
]
}
],
"source": [
"X['pickup_day_of_the_week'] = X['pickup_datetime'].dt.dayofweek\n",
"\n",
"X['pickup_hour'] = X['pickup_datetime'].dt.hour\n",
"\n",
"X['pickup_month'] = X['pickup_datetime'].dt.month\n",
"\n",
"X['pickup_day_of_year'] = X['pickup_datetime'].dt.dayofyear\n",
"X['pickup_week_of_year'] = X['pickup_datetime'].dt.weekofyear\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 383
},
"id": "Q5n8YUbKWQUU",
"outputId": "217cc1e1-c05e-4d96-eddc-63d67956a564"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>passenger_count</th>\n",
" <th>pickup_longitude</th>\n",
" <th>pickup_latitude</th>\n",
" <th>dropoff_longitude</th>\n",
" <th>dropoff_latitude</th>\n",
" <th>vendor_id_1</th>\n",
" <th>vendor_id_2</th>\n",
" <th>flag_N</th>\n",
" <th>flag_Y</th>\n",
" <th>lat_diff</th>\n",
" <th>long_diff</th>\n",
" <th>euclidean_dist</th>\n",
" <th>haversine_dist</th>\n",
" <th>manhattan_dist</th>\n",
" <th>pickup_day_of_the_week</th>\n",
" <th>pickup_hour</th>\n",
" <th>pickup_month</th>\n",
" <th>pickup_day_of_year</th>\n",
" <th>pickup_week_of_year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>-73.982155</td>\n",
" <td>40.767937</td>\n",
" <td>-73.964630</td>\n",
" <td>40.765602</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.002335</td>\n",
" <td>0.017525</td>\n",
" <td>0.017680</td>\n",
" <td>1.498521</td>\n",
" <td>1.735433</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>3</td>\n",
" <td>74</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>-73.980415</td>\n",
" <td>40.738564</td>\n",
" <td>-73.999481</td>\n",
" <td>40.731152</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.007412</td>\n",
" <td>0.019066</td>\n",
" <td>0.020456</td>\n",
" <td>1.805507</td>\n",
" <td>2.430506</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>164</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>-73.979027</td>\n",
" <td>40.763939</td>\n",
" <td>-74.005333</td>\n",
" <td>40.710087</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.053852</td>\n",
" <td>0.026306</td>\n",
" <td>0.059934</td>\n",
" <td>6.385098</td>\n",
" <td>8.203575</td>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>19</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>-74.010040</td>\n",
" <td>40.719971</td>\n",
" <td>-74.012268</td>\n",
" <td>40.706718</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.013252</td>\n",
" <td>0.002228</td>\n",
" <td>0.013438</td>\n",
" <td>1.485498</td>\n",
" <td>1.661331</td>\n",
" <td>2</td>\n",
" <td>19</td>\n",
" <td>4</td>\n",
" <td>97</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>-73.973053</td>\n",
" <td>40.793209</td>\n",
" <td>-73.972923</td>\n",
" <td>40.782520</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.010689</td>\n",
" <td>0.000130</td>\n",
" <td>0.010690</td>\n",
" <td>1.188588</td>\n",
" <td>1.199457</td>\n",
" <td>5</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>86</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>-73.982857</td>\n",
" <td>40.742195</td>\n",
" <td>-73.992081</td>\n",
" <td>40.749184</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.006989</td>\n",
" <td>0.009224</td>\n",
" <td>0.011572</td>\n",
" <td>1.098942</td>\n",
" <td>1.554180</td>\n",
" <td>5</td>\n",
" <td>22</td>\n",
" <td>1</td>\n",
" <td>30</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>4</td>\n",
" <td>-73.969017</td>\n",
" <td>40.757839</td>\n",
" <td>-73.957405</td>\n",
" <td>40.765896</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.008057</td>\n",
" <td>0.011612</td>\n",
" <td>0.014133</td>\n",
" <td>1.326279</td>\n",
" <td>1.873902</td>\n",
" <td>4</td>\n",
" <td>22</td>\n",
" <td>6</td>\n",
" <td>169</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1</td>\n",
" <td>-73.969276</td>\n",
" <td>40.797779</td>\n",
" <td>-73.922470</td>\n",
" <td>40.760559</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.037220</td>\n",
" <td>0.046806</td>\n",
" <td>0.059801</td>\n",
" <td>5.714981</td>\n",
" <td>8.078684</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>142</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1</td>\n",
" <td>-73.999481</td>\n",
" <td>40.738400</td>\n",
" <td>-73.985786</td>\n",
" <td>40.732815</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.005585</td>\n",
" <td>0.013695</td>\n",
" <td>0.014790</td>\n",
" <td>1.310353</td>\n",
" <td>1.774804</td>\n",
" <td>4</td>\n",
" <td>23</td>\n",
" <td>5</td>\n",
" <td>148</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1</td>\n",
" <td>-73.981049</td>\n",
" <td>40.744339</td>\n",
" <td>-73.973000</td>\n",
" <td>40.789989</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.045650</td>\n",
" <td>0.008049</td>\n",
" <td>0.046355</td>\n",
" <td>5.121162</td>\n",
" <td>5.754187</td>\n",
" <td>3</td>\n",
" <td>21</td>\n",
" <td>3</td>\n",
" <td>70</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" passenger_count pickup_longitude ... pickup_day_of_year pickup_week_of_year\n",
"0 1 -73.982155 ... 74 11\n",
"1 1 -73.980415 ... 164 23\n",
"2 1 -73.979027 ... 19 3\n",
"3 1 -74.010040 ... 97 14\n",
"4 1 -73.973053 ... 86 12\n",
"5 6 -73.982857 ... 30 4\n",
"6 4 -73.969017 ... 169 24\n",
"7 1 -73.969276 ... 142 20\n",
"8 1 -73.999481 ... 148 21\n",
"9 1 -73.981049 ... 70 10\n",
"\n",
"[10 rows x 19 columns]"
]
},
"execution_count": 21,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"X = X.drop('pickup_datetime',axis=1)\n",
"X = X.drop('dropoff_datetime', axis=1)\n",
"X[:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JsXogqJlWQUW",
"outputId": "46cdd2f6-db4c-4013-df99-618fbb0a74a5",
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"List of features : ['passenger_count' 'pickup_longitude' 'pickup_latitude'\n",
" 'dropoff_longitude' 'dropoff_latitude' 'vendor_id_1' 'vendor_id_2'\n",
" 'flag_N' 'flag_Y' 'lat_diff' 'long_diff' 'euclidean_dist'\n",
" 'haversine_dist' 'manhattan_dist' 'pickup_day_of_the_week' 'pickup_hour'\n",
" 'pickup_month' 'pickup_day_of_year' 'pickup_week_of_year']\n"
]
}
],
"source": [
"feature_list = X.columns.values\n",
"print(\"List of features : {}\".format(feature_list))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "bQMrzdyGhPu4"
},
"source": [
"# Training"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "X3cvffRoUYJE"
},
"source": [
"Random Forest Regression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "eEEy6Yqj7g6v"
},
"outputs": [],
"source": [
"X_temp = X"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LRtGHpn_7g6x"
},
"outputs": [],
"source": [
"# X_train, X_test, y_train, y_test = train_test_split(X_temp, y, test_size = 0.2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cKgvCqtVa1KB",
"outputId": "876330bc-c1f6-4606-8308-6dc7ac3e9403"
},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" max_samples=None, min_impurity_decrease=0.0,\n",
" min_impurity_split=None, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=50, n_jobs=-1, oob_score=False,\n",
" random_state=42, verbose=0, warm_start=False)"
]
},
"execution_count": 25,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"regr = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1)\n",
"regr.fit(X_temp, y)\n",
"# regr.fit(X_train, y_train)\n",
"# Y_test = regr.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PVHcFd-NWQUY",
"outputId": "3a02b881-0a64-44ff-bbea-2c8911913925",
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0.3389718898953847"
]
},
"execution_count": 28,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"# from sklearn.metrics import mean_squared_log_error\n",
"# Y_test = regr.predict(X_test)\n",
"# Y_test_temp = np.round(Y_test)\n",
"# np.sqrt(mean_squared_log_error(y_test, Y_test_temp))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "reocx_zPmchc"
},
"source": [
"# Feature Extraction on test data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gyqmdO6IUln3"
},
"outputs": [],
"source": [
"# !unzip 'test.zip'\n",
"df_test = pd.read_csv('test.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Oe62ewNmUzxu",
"outputId": "e993d3f6-6afa-426a-8ad6-ae3baa0f2e12"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:34: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n"
]
}
],
"source": [
"Xt = df_test\n",
"\n",
"Xt = Xt.drop('id', axis = 1)\n",
"\n",
"#encoding vendor_id ={1,2} to vendor_id_1 and vendor_id_2 columns\n",
"encoded_vendor_id=pd.get_dummies(Xt['vendor_id'], prefix='vendor_id')\n",
"# Drop column vendor_id as it is now encoded\n",
"Xt = Xt.drop('vendor_id',axis = 1)\n",
"# Join original with encoded \n",
"Xt = Xt.join(encoded_vendor_id)\n",
"\n",
"#encoding store_and_fwd_flag = {Y,N} to flag_1 and flag_2 columns\n",
"encoded_flag_id=pd.get_dummies(Xt['store_and_fwd_flag'], prefix='flag')\n",
"# Drop column store_and_fwd_flag as it is now encoded\n",
"Xt = Xt.drop('store_and_fwd_flag',axis = 1)\n",
"# Join original with encoded \n",
"Xt = Xt.join(encoded_flag_id)\n",
"\n",
"Xt['lat_diff'] = abs( Xt['pickup_latitude'] - Xt['dropoff_latitude'] )\n",
"Xt['long_diff'] = abs( Xt['pickup_longitude'] - Xt['dropoff_longitude'] )\n",
"\n",
"Xt['euclidean_dist'] = get_euclidean_dist(Xt['pickup_latitude'].to_numpy(), Xt['pickup_longitude'].to_numpy(), Xt['dropoff_latitude'].to_numpy(), Xt['dropoff_longitude'].to_numpy())\n",
"Xt['haversine_dist'] = get_haversine_dist(Xt['pickup_latitude'].to_numpy(), Xt['pickup_longitude'].to_numpy(), Xt['dropoff_latitude'].to_numpy(), Xt['dropoff_longitude'].to_numpy())\n",
"Xt['manhattan_dist'] = get_manhattan_distance(Xt['pickup_latitude'].to_numpy(), Xt['pickup_longitude'].to_numpy(), Xt['dropoff_latitude'].to_numpy(), Xt['dropoff_longitude'].to_numpy())\n",
"\n",
"Xt['pickup_datetime'] = pd.to_datetime(Xt['pickup_datetime'], \n",
"format = '%Y-%m-%d %H:%M:%S', \n",
" errors = 'coerce')\n",
"\n",
"Xt['pickup_day_of_the_week'] = Xt['pickup_datetime'].dt.dayofweek\n",
"Xt['pickup_hour'] = Xt['pickup_datetime'].dt.hour\n",
"Xt['pickup_month'] = Xt['pickup_datetime'].dt.month\n",
"Xt['pickup_day_of_year'] = Xt['pickup_datetime'].dt.dayofyear\n",
"Xt['pickup_week_of_year'] = Xt['pickup_datetime'].dt.weekofyear\n",
"\n",
"Xt = Xt.drop('pickup_datetime',axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rO_NDhHkremd"
},
"outputs": [],
"source": [
"yt = regr.predict(Xt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "e3MjSMbGss-S",
"outputId": "0c3cb275-6ae2-4d8a-a053-6066ae00c3d9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 869. 732. 434. ... 1497. 1971. 1214.]\n"
]
}
],
"source": [
"ytfinal = np.round(yt)\n",
"print(ytfinal)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jfpYtSeVs-l5"
},
"outputs": [],
"source": [
"df_test['trip_duration'] = ytfinal.astype(int)\n",
"df_test.to_csv('rf_50est_dt.csv', columns=['id', 'trip_duration'], index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FrCAAhDV78uc",
"outputId": "ec1bb1c0-40ee-4cca-d73b-3a0939f62ac2"
},
"outputs": [
{
"data": {
"text/plain": [
"array([4.84419100e-03, 3.32528093e-02, 2.80487417e-02, 3.33748071e-02,\n",
" 4.15970687e-02, 1.43424409e-03, 1.50826012e-03, 2.24943165e-04,\n",
" 2.19223631e-04, 1.92702488e-02, 2.36797637e-02, 2.33382653e-01,\n",
" 4.35189114e-01, 1.37538090e-02, 2.79698904e-02, 7.08421026e-02,\n",
" 2.49476933e-03, 2.14455316e-02, 7.46782927e-03])"
]
},
"execution_count": 31,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"regr.feature_importances_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 282
},
"id": "6ErJA2PH6lub",
"outputId": "969e5de9-40fd-4835-f07a-751464a5f771"
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f5e87780908>"
]
},
"execution_count": 32,
"metadata": {
"tags": []
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAD4CAYAAACt3uxiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3debxcRZ3+8c9DWAwEoxhkAqjRGAVkCRAQQRBcQBZZNBERhCCCKJujoMzoQMSNxVF/yBoQgooQwoBGItuEhISwZg9hHUkUARFUAhGJEr6/P6paTpre7k133773Pu/XK690n6Wq+uBMpeqc85QiAjMzM2ufNXq6AWZmZv2NO18zM7M2c+drZmbWZu58zczM2sydr5mZWZut2dMNsM43ZMiQGDZsWE83w8ysV5kzZ86zEbFhpX3ufK2uYcOGMXv27J5uhplZryLpd9X2edrZzMyszdz5Wl2LnljW000wM+tT3Pl2KEknSnpQ0hOSzmtSmZJ0h6S9C9vGSLqpGeWbmVljfM+3c30R+HD+M6oZBUZESDoWmCRpGum//3eBjzajfDMza4xHvh1I0kXAO4AbgTcWtn9M0j2S5kn6X0kb5e0bSrpV0mJJl0r6naQhlcqOiPuBXwNfA04DfhoRv63QhmMkzZY0e+WLnnY2M2smeWGFziRpKWnEux8wKiKOl/RG4Lk8gv0csHlEfCVPSz8REd+T9FFSp71hRDxbpez1gLnAP3LZK2q1ZZ2hI2LFU48278eZmfUDkuZERMWZS0879y6bAhMlDQXWBpbk7e8HDgKIiJsk/bVWIRHxN0kTgeX1Ol4zM2s+Tzv3Lj8GzouIrYDPA69bjbJeyX/q2mqTwatRjZmZlXPn27sMBp7In48obJ8FfBJA0p4U7hObmVnncefbu4wjPak8Byjez/0msKek+4ExwB+BF9rfPDMza4QfuOoDJK0DrIyIlyW9D7gwIkY2q/xRo0aF4yXNzLrGD1z1fW8FrpG0BukJ5qN7uD1mZlaDO98+ICIeBbYtbpP0JmBqhcM/FBF/bkvDzMysIt/z7TDNipWMiD9HxMjiH2Ab4D8LdZ0saVy9spztbGbWXO58O88XgY8AX29B2SuAj1dLvzIzs/Zw59tBWhkrmb0MjAf+vZW/w8zManPn20Ei4ljgSWAPoJhSdQewU0RsC1wNfDVvPx24LSLeA1xLevCqnvOBQyXVTM5wtrOZWev4gaveoSmxkvm45yX9FDgR+HuN48aTRsmsM3SE30czM2sij3x7h2bGSgL8CDgKWK+Rgx0vaWbWXO58e4emxkpGxF+Aa0gdsJmZtZk7395hHM2PlfxvwE89m5n1AN/z7TARMSx/nJD/EBG/An5V4fBlwF6FWMkdai0RGBGDCp+fBtZtTqvNzKwr3Pn2bo6VNDPrhdz59mKOlTQz653c+fYxuYNt2opGkOIlh506haVn7tvMYs3M+i0/cNVGkoblh6NWt5yNJV1bZd90SRWXsMr7vyPpcUnLV7cdZmbWPe58O5ikijMTEfFkRIzuZrG/BnbsfqvMzGx1edq5CklnAo9HxPn5+zhgOSDSu7XrANdHxOmShpHymO8Adia9k3tARPxd0vbAZbnYWwrlvw64EBhFylz+ckRMkzQW+DgwCBgAfKBC24YBN0TElpIGApeTVix6CBhY63dFxN25jHq//xjgGIABr9+w5rFmZtY1HvlWN5EcYJF9EngGGEEaOY4Etpe0W94/Ajg/5yw/B3wib78cOCEitikr/zggcmrVIcAVuUMG2A4YHRGv6Xgr+ALwYkRsTsp63r4Lv7GqiBgfEaMiYtSAdZ1wZWbWTO58q4iIecCb8/3VbUgLHWwF7AnMA+YCm5E6XYAlETE/f54DDJP0BuANETEjb/9ZoYr3Az/PdT0E/A54V953a06hasRuhXIWAgu79EPNzKztPO1c2yRgNPBvpJHw24DvRcTFxYPyNHAx3GIldaZ/6/jbapzbdFttMpjZftLZzKxpPPKtbSLwKVIHPAm4GfispEEAkjaR9OZqJ0fEc8Bzkt6fNx1a2D2z9F3Su0iBGQ93o40zgE/ncrYEtu5GGWZm1kbufGuIiMXA+sATEfFURNwC/AK4S9Ii0hq669cp5kjgfEnzSQ9rlVwArJHLmQiMrRUNWcOFwCBJDwJnkKa8q5J0tqQ/AOtK+kN+kMzMzNpIEV6q1WobNWpUzJ49u6ebYWbWq0iaExEVcxc88jUzM2szP3DVwSRtxapPSAOsiIj3NnDuPaR3kYs+ExGLmtU+MzPrHo9826ir8ZIRsSgiRpb9eW8j8ZIR8d7yc4HfSpoi6SFJi3OQSF2lbGczM2sOd74drEXxkt+PiM1IqyHtImnvbjfQzMy6xdPOVfTFeMmIeBGYlj//Q9JcYNMqv9/xkmZmLeKRb3V9Ol4yp299jMpr/zpe0syshdz5VtGX4yXzdPZVwLkR8ViD9ZiZWZN42rm2vhovOR54NCJ+1MjBjpc0M2suj3xr63PxkpK+DQwGvtSNuszMrAk88q0hIhZL+le8JPCUpM1J8ZKQHsA6jDTSreZI4DJJQeGBK1K85IU5XvJlcrxkvXV2K7gQuDzHSz5IjXhJSZsCXyc9mDU313VeRFza1UrNzKz7HC9pdTle0sys6xwvaWZm1kE87dzBHC9pZtY3edrZ6lpn6IgYesSPWOonns3MGuZp5wZJulTSFjX2j5N0cjvbtLokjZS0T+F7r/sNZmZ9jTvfgoj4XEQ80NPtaLKRwD51jzIzs7bpl51vXl3oIUlXSnpQ0rWS1i2tCJSP+aikuZIWSHpNBKOkoyXdKGmgpOWF7aMlTcifJ0i6SNJsSY9I2q9Gm8ZK+qWkWyUtlXS8pC9Lmifpbkkb5ONG5u8LJV0v6Y15+3RJZ0m6N9e1q6S1gTOAgyXNl3Rwrm6LfPxjkk6s0p5jcrtnr3xxWfcutJmZVdQvO9/s3cAFORP5eeCLpR2SNgQuAT6RM5nHFE+UdDywH3BgRPy9Tj3DSFnQ+wIXFfKbK9mStKjCDsB3SJnN2wJ3AYfnY34KfC0itgYWkfKcS9aMiB1JARqnR8Q/gNOAiXlZwYn5uM2AvXK7Tpe0VnlDnO1sZtY6/bnzfTwiZuXPPydlLZfsBMyIiCUAZTnLhwN7kxY+KEZKVnNNRLwSEY8Cj5E6vmqmRcQLEfEMsAz4dd6+iJQVPZiUFX173n4FKdu55Lr89xxSp1/NlIhYERHPAn8CNmrgd5iZWZP05863/DHvRh/7XkTq2IpL8RXPLR/ZdqWeYmf+SuH7KzT2Wljp+JV1ji/Poa5Z9labDPaTzmZmTdSfO9+3Snpf/vxp0lq8JXcDu0l6O0Dpfms2D/g8MFnSxnnb05I2l7QGcFBZPWMkrSFpOPAOupffDEBELAP+KmnXvOkzwO01TgF4AVi/u3WamVnz9efO92HguJyJ/EZSRjIAedr3GOA6SQtICyxQ2H8HcDIwRdIQ4FTgBuBO4Kmyen4P3AvcCBwbES+tZruPAM6RtJD0JPMZdY6fRnrAqvjAlZmZ9aB+GbKRlwC8ISK2bHE9E3I917aynlZztrOZWdc5ZMPMzKyD9MuRb0+StBdwVtnmJRFRfq+4HW35EjA+Il6sdVwpXrLED1+ZmdVXa+TrhRXaLCJuBm4ufZe0ZkS83EPN+RLpNauana+ZmTVXr512rpFSdZqk+yTdL2m88orxkk6U9EBOhro6b/tAfhBpfk6SWj9vPyWXsVDSNwv1PSjpEkmLJd0iaWDet0M+dr6kcyTdn7cPyN9LZX0+b99d0kxJk4GqcZaSDs/nLZD0s0I7bsvbp0p6a94+QdLowrnLC3VNz9endL2Uk602BqZJmtbk/zxmZlZDr+18s0opVedFxA75YaqBpCQqSE8kb5uToY7N204GjouIkcCuwN8l7QmMIKU/jQS2l1QKshgBnB8R7wGeAz6Rt18OfD6Xs7LQvqOAZRGxAym16ujS60vAdsBJEfGuSj9M0nuAbwAfzClbJ+VdPwauyL/jSuDcBq7TtqRR7hak1512iYhzgSeBPSJijwr1O17SzKxFenvnWymlag9J90haBHwQeE/evxC4UtJhQGmadxbwgzwKfEOe/t0z/5kHzCUlUo3Ixy+JiPn58xxS6tQbgPUj4q68/ReF9u0JHC5pPnAP8KZCWfeWErSq+CAwKadQFVO23leo42esmsxVzb0R8YeIeAWYT+30K3J9jpc0M2uR3n7Pt1J61AXAqIh4XNI4Xk2c2pcUxfgx4OuStoqIMyVNIa36Mys/DCXgexFxcbHg/HpSeTLUwDrtE3BCvs9bLGt34G+N/MAueJn8j6kc9rF2YV+XEq3MzKy1evvIt1pK1bOSBgGj4V+d0VsiYhrwNWAwMEjS8IhYFBFnAfeRRrk3A5/N5yNpE0lvrtaAiHgOeEHSe/OmTxV23wx8obRwgaR3SVqvwd92Gykd60353FLK1p2FOg4FZubPS4Ht8+f9gdcsllBBQ+lXpXjJ0h8zM1s9vX0EVEqpuoz04NKFpLSq+4E/kjpUgAHAz/PCBALOjYjnJH1L0h6k7OTFwI0RsULS5sBd+Vmt5cBhrHovt9xRwCWSXiHFPZZukl5KmuKdmx/8egY4sJEfFhGLJX0HuF3SStI0+FjgBOBySafk8o7Mp1wC/Conct1EYyPr8cBNkp6sdN/XzMxao9e+59uulKpGSBoUEaWni08FhkbESXVO6zWccGVm1nV+z7f19pX0H6Tr+TvSCNXMzKyiXtv5RsRS0uLzPS4vUj+x7oEV5Hu6Uyvs+lBE/Hm1GmZmZh2p13a+fUXuYEf2dDvMzKx9erzzlXQp8IOIqJj0lF8XWh4R329rw7pJ0vKIGLSaZZxDev3pNxFxSnNa1n2LnljGsFOnvGa7n3w2M+ueHu98I+JzPd2GDnQMsEFE1HrCuql6OGPazKxfadt7vjWymKdLGpWP+aikuTnL+DX3QSUdLelGSQNL2cV5+2iltXNLGccX5WjERyTtV15O4bwpkrbOn+dJOi1/PkPS0fnza3Ke8/bDJN2rlOd8saQBZWUPkXSXpIrDw5yvfI5SBvUi5YXulfKeBwFzStvKzltf0pLCu8OvL32XNFzSTZLmKGVHb5aP+VhO/Zon6X8lbZS3j5P0M0mzSGlZZmbWBu0O2aiUxQyApA1J76p+ImcZjymeKOl4Uk7zgRHx9zr1DCNlM+8LXCTpdVWOmwnsmt//fRnYJW/fFZihKjnP+T3gg0kZyaU850MLbd0ImAKcFhGvna9NPp7L3Ab4MHCOpKERsT/w94gYmR/kWkVEvABMz78NUuDGdRHxT9J7uydExPak3OoL8jF3ADtFxLbA1cBXC0VuAXw4Ig4p1iNnO5uZtUy7p53Ls5hPLOzbCZhRyjsuZBkDHA48Tup4/9lAPdfkHONHJT1GSq6aX+G4mbkNS0id5UckrQu8PSIezqPfUs4zpBHpCGBrUprUfTmIYyDwp3zMWqSnl4+LiNtrtPH9wFV5avlpSbeTFl+Y3MDvu5TUgf6SFLJxdE7k2hmYlNsEsE7+e1NgoqShpNjJYqb05Er/mImI8aTOnHWGjuidL4ObmXWodne+lbKYG7GINErclFc7juK55SPbRuu5DxgFPAbcCgwBjiYtmgDVc55PIK0s9B8Vynw5n78XKe2q6SJiVp7G3x0YEBH3S3o98FweiZf7Memhtsn5nHGFfc3OmDYzszraPe1cLYsZ4G5gN+Ul9wpZxpBGnp8HJkvaOG97WtLmSrnNB5XVM0bSGpKGk5bQe7hSYyLiH6QR9RjgLtJI+GRgRj6kWs7zVGB0/oykDSS9rVQs8FlgM0lfq3EtZgIHK635uyFp0Yd7axxf7qek1Y0uz7/leWCJpDG5TZK0TT52MPBE/nxEF+oAXpvt7IxnM7PV0+7Ot5TF/CApg/nC0o6IeIb0lO91SvnEq9zvjIg7SB3jFElDSOvz3kBaaOCpsnp+T+rIbgSOjYiXarRpJvCnPPU6kzS6npnrvIXUwd2ltEThtaTlAx8grbV7i6SFpFHz0EJbVwKHAB+U9EUqu560zOEC0iIKX42IP9ZoZ7krSdfwqsK2Q4Gj8vVbDByQt48jTUfPAZ7tQh1mZtYCbct2VpuymPNTzzdExLWtrKenSRoNHBARn2l1Xc52NjPrOjnbuW+R9GNgb1IQh5mZ9TJt63zblcUcEWPLt0naCzirbPOSiCi/V9x0krbite/QroiI91Y6vuzcr1P2yhUwKSJOaFb7zMys/XrtkoLWPusMHRFDj/hRQ8f6QSwzs6TWtHO7H7jq1YqpWg0ev7uknQvfD5S0RfNbtkqdE/L9YCRdWqs+SWMLT4+bmVmbuPNtrd1JwRclB5ISpdoiIj5XbcGKbCzgztfMrM36ZOerV3OkJ+R85yslfVjSLEmPStox/7kr5x3fKend+dyxkq7LGcmPSjq7rOzvKGVP313ISH5NdnJ+uvtY4N+V8p8/AOxPipGcn3OYj1bKjV4g6X9yulZp9HpubtdjpZFsld8qSedJeljS/wJvLuybLmlUfpd4gl7Nkf73XOYo4MrcnoFl5Tpe0sysRfpk55u9E/hvUrTkZqRQj/eT3hX+T+AhYNecd3wa8N3CuSNJ2c1bkYIw3pK3rwfcnbOnZ5DSsKBCdnJ+wOwi4Ic5p/l2UnTkKfn7b0mZzDvk8h4Ejiq0YWhu737AmTV+50GkzOwtSDGcO1c4ZiSwSURsGRFbAZfnV7FmA4fm9qwSMRkR4yNiVESMGrDu4BrVm5lZV/XlV42WRMQiAEmLgakRETksYxgp9ekKSSNIqVRrFc6dGhHL8rkPAG8jJWH9gxTsASlC8iP5c63s5Fq2lPRt4A2k3OibC/t+mfOpHyiNsKvYjVczop+UdFuFYx4D3pFfUZoC3NJg+8zMrAX6cue7ovD5lcL3V0i/+1vAtIg4KE8RT69y7kpevU7/jFcfDy9ur5WdXMsE0mIRCySNJd0jrtQGsRoi4q85anIv0lT4J0kRmA3ZapPBzPZTzGZmTdOXp53rKeYdj21iWcXs5BeA9Wt8Xx94Smlt3kPpnhm8mhE9FNij/IAcx7lGRPwPKRZzuyrtMTOzNujPne/ZwPckzWP1ZwDGUTk7+dfAQfmBpl1J94NPyQ9mDQf+C7gHmEW6B90d1wOPAg+QFlu4q8IxmwDTJc0nLeVYWo1pAmm949c8cGVmZq3jkA2ry9nOZmZd55ANMzOzDtKXH7jqU1YnI9rMzDqLO98OIGl5RAyqsf8NpHeSR3ahzAnkpRUlXUp6GvsBSWOAM4A/RsQekq4C3kN69/eHlcpa9MQyhp06pSs/6V+c9Wxm9lrufHuHNwBfBC7ozskR8bnC16OAoyPiDkn/BuwQEe9sQhvNzKxBvufbQSQNkjRV0twcA3lA3nUmMDw/lXxOlXMbiZk8jZSa9ZNczi3AJoWnsc3MrA088u0sLwEHRcTz+d3cuyVNBk4Ftqwz7VyMmdyI9OrRZcUDIuIMSR8ETo6I2ZLOJ01Nv6ZcSccAxwAMeP2GTfhpZmZW4s63swj4rqTdSElcm5A60kY0EjPZsIgYD4yHtJ7v6pRlZmarcufbWQ4FNgS2j4h/SloKvK5nm+R4STOzZvM9384yGPhT7nj3IC3oAI3FQNaNmTQzs87gzrezXAmMyisvHU6OnIyIPwOz8nq8FR+4orGYSTMz6wCOl7S6HC9pZtZ1jpc0MzPrIH7gqpdxzKSZWe/nzreXiYhFQMMxk82wOvGS4IhJM7NyHT3tLOlSSVvU2D9O0sltast0SRXn7ptczxhJD0qa1uq6zMysZ3T0yLcsk7i/+Ff2crsqlLRmRLzcrvrMzPq7jhj5Shom6SFJV+ZR37WS1i2ONiV9NGceL5A0tUIZR0u6UdJAScsL20fnFX6QNEHSRZJmS3pE0n412jRQ0tW5PdcDAwv7LsxlLJb0zbztg5J+WTjmI/m8auUfkvOb75d0Vt5Wnr1c6bwZkkYWvt8haRtJ60m6TNK9kuaVcqHztZ2Zr91cSTvn7bvn7ZNJryeV13NM/o2zV764rNrPMDOzbuikke+7gaMiYpaky0ir+AAgaUPgEmC3iFgiaYPiiZKOBz4CHBgRKyTVqmcYsCMwHJgm6Z0R8VKF474AvBgRm0vaGphb2Pf1iPiLpAHA1Lx/GnCBpA0j4hngSMqylQvt3Rg4C9ge+Ctwi6QDy7OXq7T/J8BY4EuS3gW8LiIWSPoucFtEfFZpCcJ78wILfwI+EhEvSRoBXAWUps+3I2VGLymvxPGSZmat0xEj3+zxiJiVP/+cNAIs2QmYUeokIuIvhX2HA3sDoyNiRQP1XBMRr0TEo8BjwGZVjtstt4OIWAgsLOz7pKS5wDzSWrhbRHph+mfAYbnzex9wY5WydwCmR8Qzebr3ylxfIyYB+0laC/gsMCFv3xM4VdJ8YDoplvKtwFrAJTm4YxJp4YWSeyt1vGZm1lqdNPItH101OtoqPf27KVDqSIrnlmcjd7ceACS9HTiZtA7uX/OUdqmOy4Ffk1YnmtSK+6gR8aKkW4EDgE+SRs+QFmX4REQ8XNbeccDTwDakf2wVR/l/a6ROZzubmTVXJ4183yrpffnzp4HiA0d3A7vljo+yaed5wOeByXk6F+BpSZtLWoO01F7RGElrSBoOvAN4mMpm5HYgaUtg67z99aROa5mkjUijbgAi4kngSeAbpI64mnuBD0gakqeuDwFur3F8uUuBc4H7IuKvedvNwAnKc+6Sts3bBwNPRcQrwGeAAV2ox8zMWqCTOt+HgeMkPQi8EbiwtCPfQz0GuE7SAmBi8cT8ZPDJwBSldXBPBW4A7gSeKqvn96TO70bg2Cr3e8n1D8rtOQOYk+taQOrwHwJ+AcwqO+9K0hT6g9V+aEQ8lds4DVgAzImIX1U7vsL5c4DnWbWD/xZpinmhpMX5O8AFwBH5um1Gg6NdMzNrnY7IdpY0jLSo+5YtrmdCrufaFtZxHjAvIn7Swjo2Jt3X3SyPaFvK2c5mZl3nbOc2kTSHND398xbWcThwD+mJ65Z3vGZm1nwd8cBVRCwFWjrqzfWMLd8maS/Saz9FSyKi/F5xI+VvX75N0j3AOmWbP5NjIquq066fdrVtZmbWOTpi2rkvk7Q8Igb1QL1LgVER8aykOyOiFK5xDrAP8BvgbNK98bWBEyNiZqWy1hk6IoYe8aPVbpMzns2sP6k17dwRI19rrVLHmx0DbBARKyV9CljUT2M8zcx6jO/5tomSc3Kc5CJJB+ftu+cYzWv1asRm6XWhffK2OZLOlXRDjfLfJOmWHHl5Kem939K+5fnvycAgYI6kr5FGvgdImi9pYMWCzcys6TzybZ+Pk8JAtgGGAPdJmpH3bUtKynqS9OrSLpJmAxfzaqTmVXXKPx24I0dU7ktaoGEVEbF/ngYfCSDpadLU9PHlx0o6hjRKZsDrN+z6rzUzs6o88m2f9wNXRcTKiHiaFKqxQ953b0T8IT+9PJ+UP70Z8Fgh/rFe51uMw5xCyozutogYHxGjImLUgHUHr05RZmZWxiPfzlDMpF5Jh/13cbykmVlzeeTbPjOBgyUNyKs07UZK2qrmYeAdOYAE4OA65RfjMPcmpYSZmVkH6qgRVh93PWmlowWkxRy+GhF/lFRxVaWI+LukLwI3SfobcF+d8r8JXJWjJe8kxWiamVkH8nu+HUzSoIhYnp9+Ph94NCJ+2O52OF7SzKzrHC/Zex2d1+ddTFqd6OIebo+ZmTWBp507WB7lrjLSlXQkcFLZobMi4ri2NczMzFaLp52trmbFSxY5atLM+rqWTDtLulTSFjX2j5N0cnfL72Jbpkuq+AObXM8YSQ9KmlZl/0hJ+xS+t+0adIWkCZJG93Q7zMz6q253vhHxuYh4oJmN6QWOAo6OiD2q7B9JWrTAzMysqrqdr6RhhczhB3MG8brF0aakj0qaK2mBpKkVyjha0o2SBpZyhvP20XmB+9Jo7CJJsyU9Imm/Gm0aKOnq3J7rgYGFfRfmMhZL+mbe9kFJvywc85F8XrXyD8n5y/dLOitvO42UUvWTvDJQ+TlrA2eQ3uWdX8puBrbI1+oxSScWjj9M0r352IslDajSljGSfpA/nyTpsfz5HZJm5c/bS7o9Z0DfLGlo3j5c0k15+8xKrzVJ+la+9gPKth+Tr+PslS8uq3apzMysGxod+b4buCAiNgeeB75Y2pEDIy4BPhER2wBjiidKOh7YDzgwIv5ep55hwI7AvsBFkl5X5bgvAC/m9pwOFNfR/XqeY98a+ICkrYFpwGa5rQBHApdVKljSxqR1dD9IGsnuIOnAiDgDmA0cGhGnlJ8XEf8ATgMmRsTIiJiYd20G7JV/1+mS1pK0OSk0Y5ecs7wSOLTKb50J7Jo/7wr8WdIm+fMMSWsBPwZG5/WELwO+k48fD5yQt58MXFD2W88BNgSOjIiVZb/H8ZJmZi3S6NPOj0fErPz558CJhX07ATNKGcQR8ZfCvsOBx0kd7z8bqOeanG/8aB7hbUbKOi63G3Burm+hpIWFfZ/MiwKsCQwFtsjH/Aw4TNLlpLCLw6u0YQdgekQ8AyDpylzfL6scX8+UiFgBrJD0J2Aj4EOkfzDcl17hZSDwp0on5yCOQZLWB94C/CK3Z1fgOtI/jLYEbs1lDQCekjQI2BmYlLcDrFMo+r+AeyLimG7+LjMz66ZGO9/yR6IbfUR6EWn0uClQWiCgeG75yLa79QAg6e2kEd4OEfHXPKVdquNy4NfAS8CkiHi5K2Wvhkq5zQKuiIj/aLCMO0mj9YdJI+HPkv4B8RXgrcDiiHhf8QRJrweeK61gVMF9wPaSNij7B9NrONvZzKy5Gp12fquk0v9z/zRwR2Hf3cBuueND0gaFffOAzwOT83QuwNOSNpe0BnBQWT1jJK0haTjwDlJnU0kxx3hL0hQzwOuBvwHLJG0E7F06ISKeJC3Z9w1SR1zNvaTp6iH5PughpBWIGvECsH4Dx00FRkt6c/4NG0h6W43jZ5L+UTGDdE33AFZExDLSNdqw9N8nT2u/JyKeB5ZIGpO3S9I2hTJvAs4EpuRRtZmZtUmjne/DwHGSHiQF9l9Y2pGnZ48BrvPimy0AABRHSURBVJO0AJhYPDEi7iB1HFMkDQFOBW4gjeaeKqvn96TO70bg2Ih4qUp7LgQG5facAczJdS0gdU4PkaZnZ5WddyVpCv3Baj80Ip7KbZxGymGeExG/qnZ8mWmkB6yKD1xVquMB0j8CbslT5reSpsirmUmacp6R780+Tv4HUL7XPBo4K1//+aTpZkj3kY/K2xcDB5S1YxLpfv1kSQMxM7O2qBuyobSqzg0RsWVLG5KmiG+IiGtbWMd5wLyI+Emr6uiLnO1sZtZ1qhGy0W/iJSXNIU1Jf6Wn22JmZv1b3c43IpaSnqZtqYgYW75N0l6k136KlkRE+b3iRsrfvnybpHtY9QlggM9ExKJaZTWzXc1oj5mZ9S79MttZ0qXAD6oldEkaByyPiO+3oO7dgZMjolaIyEhg44j4Tf6+P+mVqTMlHQg80tV0MUnTc71dnj92trOZWdfVmnbul0sK9oJozFViKiNickScmb8eCFTN1DYzs87XpztfdWA0ZlnZO0q6S9I8SXdKercqxFRKGivpPEk7A/sD5+R9w8t+yxBJS/PnWhGce+Z650qalAM5ytvmeEkzsxbp051v1mnRmEUPAbtGxLakaMrv1oipJCLuBCYDp+R9v61RdsUIzvy61zeAD0fEdqTIzC+Xn+x4STOz1ukPTzt3WjRm0WDgCkkjSGleazVQT6OqRXDuRJq2npVjJ9cG7mpivWZmVkd/6Hw7ORrzW8C0iDgov089vcG2Fb3MqzMYjYy2BdwaEYc0WoHjJc3Mmqs/TDt3WjRm0WDgifx5bGF7rZjK8n1LeXVVp9GF7dUiOO8GdpH0zrxvPUnvaqCtZmbWJP2h8+20aMyis4HvSZrHqrMQtWIqrwZOyQ9pDQe+D3whlzGkcFy1CM5nSB39VXkq+i7SFLmZmbVJn37Pty9FY/Ykx0uamXWd3/M1MzPrIH36gau+Eo1pZmZ9S5+edrbmaEW8ZDnHTZpZX9Ovpp0lXSqpavyipHGSTm5R3btLuqEF5Z4h6cP585ckrduNMpbXP8rMzNqhz007R8TneroNzRYRpxW+fokUFvJiDzXHzMxWU68d+faC3OYNJP1S0kJJd0vaOm8fJ+my3M7HJJ1YOOe/JD0s6Q5JV5VG6LkNo/OxGwPTJE3L+6q1++05v3mRpG+Xte0USffltn2zSvud7Wxm1iK9tvPNOjm3+ZvAvIjYGvhP4KeFfZsBe+UyT5e0lqQdgE8A2wB7A6+5TxAR5wJPAntExB516v9/wIURsRWFd5Il7QmMyHWPBLaXtFuFupztbGbWIr298y3PbX5/YV+93Oa9gdERsaKBeq6JiFci4lGglNtcz/uBn+W6bwPeJOn1ed+UiFgREc8CfwI2AnYBfhURL0XEC8CvG6ijll2Aq/LnnxW275n/zAPm5t8yYjXrMjOzLujt93w7Obe5lmKHv5LV++/QlXZDynb+XkRc3GgFznY2M2uu3j7y7eTc5pnAobnu3YFnI+L5GsfPAj4m6XV5fd1q95bLs52rtXsW8Kn8+dDC9puBz+Y6kLSJpDc38HvMzKxJenvn28m5zeNI91MXAmcCR9Q6OCLuI63VuzDXswio9KTTeOCm0gNXNdp9EunaLAI2KdRzC/AL4K6871qqL+JgZmYt0GtDNvpibrOkQRGxPL/HOwM4JiLmtrreepztbGbWdbVCNnr7Pd++ZnwOCHkdcEUndLxmZtZ8vXbk25P6W26z4yXNzLquz8ZLtjIqMpe/oaR78tq5u0oak+8vnxoRI8v+HOR4STMza0Sfm3aWtGZEvNyk4j4ELCpFVkq6CTg6P6zVNo6XNDPrW3rdyFfS13PM4x2khCtyVOOPJM0GTpL0oTxaXZSjHNfJxy2VdHbefq+kd+btwyTdluMWp0p6q6SRwNnAAZLmSzqdFJzxE0nnNNBOx0uamVlFvarzlbQ96d3VkcA+wA6F3WvnufXzgQnAwTlacU3gC4XjluXt5wGlG5k/Jj3gtDVwJXBuRMwHTgMm5mnlbwKzgUMj4pQGmut4STMzq6hXdb7ArsD1EfFiDqyYXNhXeo/33aSHnx7J368Aip3LVYW/SwEd7yO9+wopirEYU9ldjpc0M7OK+tI93781eFxU+dxOjpc0M+vHetvIdwZwoNISgOsDH6twzMPAsNL9XOAzwO2F/QcX/r4rf76TVaMYZzahrY6XNDOzinrVyDci5kqaCCwgTdfeV+GYlyQdCUyStGY+5qLCIW/MkY8rgEPythOAyyWdAjwDHNmE5o4DLst1vUgD8ZKSSvGST1M/XvLJfN+3FC/5DOme9KB83EnALyR9DfhVoZ5bJG1OipcEWA4cRrqeZmbWBv0qZEPSUmBUvtfacRwvaWbWdzhesvdwvKSZWT/QrzrfiBjWjHJaFS8ZEZ9enfPNzKx36PPTzpLGAcsj4vstKn9D0j3XtYETgX8DzgD+WOld3Pzw1ckRUe2BKnLAx8YR8Zv8fX9gi4g4U9KBwCMR8UAX2zk919vl+eN2ZDtX48xnM+ut+my2c3flB7GapRRBuW1EzASOIkVQ1gvBqKUUIgJAREyOiDPz1wOBLVajbDMz62F9svPtRRGUO+YIyHmS7pT0bklrk0bOB+cyD5Y0VtJ5knYG9gfOyfuG5981Kpc3JD9URn4d62pJD0q6HhhYqHfPXO9cSZNKrx2ZmVl79LnOt5dFUD4E7BoR2+ZyvhsR/ygrs5TcRUTcSUr1OiXv+22Nsr8AvBgRmwOnA9vn6zME+Abw4YjYLrf3y+UnO9vZzKx1+lznS++KoBxMeh/5fuCHwHuaUGbJbqTVj4iIhaT3hwF2Ik1bz5I0n/T+8dvKT3a2s5lZ6/Srp53pvAjKbwHT8lrAw4Dp3SjjZV79R1R5vGQlAm6NiEPqHmlmZi3RFzvfGcAESd8j/b6PAeU5xv+KoIyI/6NyBOWZVI6g/BnNi6AcDDyRP48tbC+PkKTGvqWkKeV7gdGF7TOATwO3SdoS2Dpvvxs4v/TbJa0HbFKYBXgNZzubmTVXn5t2zsEUpQjKG6kSQUmKkJwkaRHwCpUjKE8C/j1vOwE4Mm//TN63us4GvidpHqv+Q2gasEXpgauyc64GTskPaQ0Hvg98IZcxpHDchcAgSQ+SHuCaAxARz5A6+qvyb7mLtLKRmZm1SZ9/z7erOj2Csic4XtLMrOv8nq+ZmVkH6Yv3fFdLp0dQmplZ7+dpZ6urJ+MlW8GRlWbWDp52bjJJl+bVh6rtHyfp5BbVPVbSea0o28zM2sPTzt0QEZ/r6TY0m6Q1I+Llnm6HmVl/4JFvDTnP+SFJV+aM5GslrVuWp/zRnJG8QNLUCmUcLenGnLW8vLB9tKQJ+fMESRflOMdHJFVd8SjbWNJNkh6VdHahzENyJvX9ks4qbK9X7z2k156K7Xa8pJlZi3jkW9+7gaMiYpaky4AvlnYoLSd4CbBbRCyRtEHxREnHAx8BDoyIFZJq1TMM2BEYDkzLIRgvVTl2JLAtsAJ4WNKPgZWkB7y2B/4K3CLpwIj4ZZ3ftymwc0SsLG6MiPHAeEj3fOuUYWZmXeCRb32PR8Ss/PnnrJrpvBMwIyKWAETEXwr7Dgf2BkZHxIoG6rkmIl6JiEeBx6gdfDE1IpblzvkBUjbzDsD0iHgmTx9fyap51dVMKu94zcystTzyra981NfoKHARaYS6KbCkwrnlOcxdqafYma+k/n/HWvXWzbt2vKSZWXN55FvfWyWVVjb6NHBHYd/dwG6S3g5QNu08D/g8MFnSxnnb05I2l7QGUP6+7xhJa+TIyHeQ8qe74l7gA3lN3wHAIbyaV12rXjMzazN3vvU9DByXM5LfSMpMBv6Vk3wMcJ2kBby6ZGFp/x3AycCUvI7uqcANpEUaniqr5/ekDvRG4Nga93srioincvnTSLnWcyLiV3l3rXrNzKzNHLJRQ17m74aI2LLF9UzI9Vzbynq6y9nOZmZd55ANMzOzDuIHrmqIiKVAS0e9uZ6x5ducDW1m1nd52rkbJC2PiEH5QapzI2J0hWOmAydHRFvna4v1SvoN8OmIeK7KsV8CxkfEi7XK7GvZztZ3ObfbOomnnVskIp6s1PF2iojYp1rHm30JWLdd7TEzs6Tfdb6SDpN0r6T5ki6WNKBG/OJGkq7P0ZELJO1cVtYwSffnzwMlXZ1jKK8HBhaO21PSXTmGcpKkQXn7aZLuy3GQ45UjsHJ85Vm5nY9I2rXG76lV79L86tF6kqbk33C/pIMlnQhsTErTmtaES2tmZg3qV52vpM2Bg4FdImIkKaDi0BqnnAvcHhHbANsBi2sc+wXgxYjYHDidFPNIfsXoG8CHI2I7YDbw5XzOeRGxQ36aeiBQzHReMyJ2JI1OT+9qvWU+CjwZEdvkum6KiHOBJ4E9ImKP8hOc7Wxm1jr97YGrD5E6p/vyIHMg8Kcax3+QFBNJjmCs1QvtRuqsiYiFkhbm7TsBWwCzcp1rA3flfXtI+ipp6ncDUuf+67zvuvz3HFLuc1frLVoE/HdebOGGiJhZozxyWc52NjNrkf7W+Qq4IiL+Y5WN0lcKX8vjF5tR560RcUhZna8DLgBGRcTjksaV1V2KkGwkPrKmiHhE0nbAPsC3JU2NiDMaPd/xkmZmzdWvpp2BqcBoSW+GFAcp6W1Uj1+cSprWJd8bHlyj7Bmk+EkkbQlsnbffDewi6Z1533qS3sWrHe2z+R5wdx/cqlbvv+Snsl+MiJ8D55Cm0AFeANbvZr1mZtZN/arzjYgHSPdfb8nTs7cCQ6kev3gSaWp4EWn6d4saxV8IDMoxlGfk40sRlGOBq3KddwGb5aeQLwHuB24G7uvmz6pYb5mtgHslzSfdF/523j4euMkPXJmZtZff87W6HC9pZtZ1fs/XzMysg/S3B656LcdNmpn1HZ52trocL2lm/dHqxpV62jkrJlL1YBs2ltSypQMl7S7phvx5f0mn1jh2pKR9WtUWMzOrrF91vu0kqeKUfjvzoCNickScWeOQkaR3f83MrI36Y+c7QNIlkhZLuiVnIx+dM5YXSPofSetKGizpd/nd39L7uY9LWkvScEk3SZojaaakzfIxEyRdJOke4GxJH8gZ0vMlzZO0flke9FhJ1+WyHpV0dqmR1fKgK5H0UUkPSZoLfLywfayk8/LnMTnXeYGkGZLWJr2adHBu38FlZTpe0sysRfpj5zsCOD8i3gM8B3wCuC5nLG8DPAgcFRHLgPnAB/J5+wE3R8Q/Se/HnhAR2wMnk5KqSjYFdo6IL+d9x+Uc6V2Bv1doz0hS3vRWpI7wLXXyoFeRk7IuAT5Gis78tyq/+zRgr/wb94+If+RtEyNiZERMLB4cEeMjYlREjBqwbq1sETMz66r++LTzkoiYnz+XcpO3lPRt4A3AIFLoBcBEUsc4DfgUcEEege4MTMpZzQDrFMqflHOgAWYBP5B0JamD/0PhnJKpuaNH0gPA23I7quVBl9ss/6ZHcxk/B46pcNwsYIKka3g1N9rMzHpAf+x8VxQ+ryQtrjABODAiFkgaC+ye908GvitpA9Ko8jZgPeC5PJqt5G+lDxFxpqQppPuqs/LrQi/Vac+aVMmDXh0Rcayk9wL7AnMkVVr9qCJnO5uZNVd/nHauZH3gKUlrUVhiMCKWk2If/x9pNaCVEfE8sETSGAAl21QqVNLwiFgUEWflcjZrsD3V8qAreQgYJml4/l6xw85tuSciTgOeAd6Cs53NzHqEO9/kv4B7SFOzD5Xtmwgclv8uORQ4StIC0jKAB1Qp90v5IaeFwD+BGxtpTLU86CrHvkSaZp6SH7iqtkTiOZIW5Ye97gQWkKbTt6j0wJWZmbWOQzasLkkvAA/3dDt6gSHAsz3diA7na1Sfr1FjesN1eltEbFhpR3+852td93C1lBZ7laTZvk61+RrV52vUmN5+ndz59iKSrgfeXrb5axFxc6XjzcysM7nz7UW8iIKZWd/gB66sEeN7ugG9hK9Tfb5G9fkaNaZXXyc/cGVmZtZmHvmamZm1mTtfMzOzNnPna/+SV0d6WNL/VVoHWNI6kibm/fdIGtb+VvasBq7RbnklqpcltWXpyE7UwHX6sqQHJC2UNFXS23qinT2pgWt0bA7GmS/pDklb9EQ7e1K9a1Q47hOSQlKvefXIna8BIGkAcD6wN2lRh0Mq/B/7UcBfI+KdwA+Bs9rbyp7V4DX6PSmd7BftbV3naPA6zQNGRcTWwLXA2fQjDV6jX0TEVjlH/mzgB21uZo9q8BohaX3gJFJKYa/hztdKdgT+LyIey8sNXs1rYzMPAK7In68FPqQKyzT1YXWvUUQsjYiFwCs90cAO0ch1mhYRL+avd5OW4uxPGrlGzxe+rgf0t6djG/n/SQDfIg0Eyhet6WjufK1kE+Dxwvc/5G0Vj4mIl4FlwJva0rrO0Mg1sq5fp6NoMPe8D2noGkk6TtJvSSPfE9vUtk5R9xpJ2g54S0RMaWfDmsGdr5n1GEmHAaOAc3q6LZ0oIs6PiOHA14Bv9HR7OomkNUhT8V/p6bZ0hztfK3mCtMxgyaZ5W8VjJK0JDAb+3JbWdYZGrpE1eJ0kfRj4OrB/RKwo39/HdfV/S1cDB7a0RZ2n3jVaH9gSmC5pKbATMLm3PHTlztdK7gNGSHq7pLWBTwGTy46ZDByRP48Gbov+ldLSyDWyBq6TpG2Bi0kdb7VlMPuyRq7RiMLXfYFH29i+TlDzGkXEsogYEhHDImIY6dmB/SNids80t2vc+Rrwr3u4xwM3Aw8C10TEYklnSNo/H/YT4E2S/g/4MlD10f++qJFrJGkHSX8AxgAXS1rccy3uGQ3+b+kcYBAwKb9K06/+EdPgNTpe0mJJ80n/93ZEleL6pAavUa/leEkzM7M288jXzMyszdz5mpmZtZk7XzMzszZz52tmZtZm7nzNzMzazJ2vmZlZm7nzNTMza7P/Dzx31kjLjOIjAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light",
"tags": []
},
"output_type": "display_data"
}
],
"source": [
"feat_importances = pd.Series(regr.feature_importances_, index=X.columns)\n",
"feat_importances.nlargest(len(X.columns)).plot(kind='barh')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GmusCp9pdxw_"
},
"outputs": [],
"source": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "rf_old_data_50est.ipynb",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment