Commit 5e924f63 authored by Manas Gabani's avatar Manas Gabani

trend analysis merged

parents b002a154 ddfd3f13
{
"cells": [
{
"cell_type": "markdown",
"id": "043bb863",
"metadata": {},
"source": [
"## Schools"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "89cc3670",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Education_in_India.ipynb SRC_RawData_2007_08.csv SRC_RawData_2015_16.csv\r\n",
" images\t\t\t SRC_RawData_2008_09.csv SRC_RawData_2016_17.csv\r\n",
" __MACOSX\t\t SRC_RawData_2009_10.csv 'state comp.ipynb'\r\n",
" SL_project_app\t\t SRC_RawData_2010_11.csv 'state comp_orig.ipynb'\r\n",
" SL_project_app.zip\t SRC_RawData_2011_12.csv trend.ipynb\r\n",
" SRC_MasterData.csv\t SRC_RawData_2012_13.csv trend_orig.ipynb\r\n",
" src_m.csv\t\t SRC_RawData_2013_14.csv tr.jpeg\r\n",
" SRC_RawData_2006_07.csv SRC_RawData_2014_15.csv\r\n"
]
}
],
"source": [
"!ls"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a717c019",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import glob\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1c7ab413",
"metadata": {},
"outputs": [],
"source": [
"def read_all_csv():\n",
" path = './'\n",
" extension = 'csv'\n",
" os.chdir(path)\n",
" result = glob.glob('SRC_R*.{}'.format(extension))\n",
" df=None\n",
" for file in result:\n",
" if(df is None):\n",
" df=pd.read_csv(file);\n",
" else:\n",
" df1=pd.read_csv(file);\n",
" df=pd.concat([df,df1], ignore_index=True)\n",
" df_master=pd.read_csv(\"src_m.csv\")\n",
" df_master=df_master[['State_Code','State_Name','Total_Population']]\n",
" final_df = pd.merge(df, df_master, on='State_Code', how='inner')\n",
" final_df['total']=final_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
" final_df['total_by_pop']=final_df['total']/final_df['Total_Population']\n",
"\n",
" final_df=final_df[[\"Year\",\"State_Name\",\"State_Code\",\"Main_Dimension\",\"Sub_Dimension\",\"Total_Population\",'total','total_by_pop']]\n",
"\n",
"\n",
" return final_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7ad2672a",
"metadata": {},
"outputs": [],
"source": [
"df=read_all_csv();"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2bb248aa",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>State_Name</th>\n",
" <th>State_Code</th>\n",
" <th>Main_Dimension</th>\n",
" <th>Sub_Dimension</th>\n",
" <th>Total_Population</th>\n",
" <th>total</th>\n",
" <th>total_by_pop</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>School by Category</td>\n",
" <td>Government</td>\n",
" <td>12549</td>\n",
" <td>23234.0</td>\n",
" <td>1.851462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>School by Category</td>\n",
" <td>Private</td>\n",
" <td>12549</td>\n",
" <td>5073.0</td>\n",
" <td>0.404255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>School by Category</td>\n",
" <td>Madrasas &amp; Unrecognised</td>\n",
" <td>12549</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>Rural Schools</td>\n",
" <td>Government</td>\n",
" <td>12549</td>\n",
" <td>21708.0</td>\n",
" <td>1.729859</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>Rural Schools</td>\n",
" <td>Private</td>\n",
" <td>12549</td>\n",
" <td>3584.0</td>\n",
" <td>0.285600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11469</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>Graduate</td>\n",
" <td>35004</td>\n",
" <td>128603.0</td>\n",
" <td>3.673952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11470</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>Post Graduate</td>\n",
" <td>35004</td>\n",
" <td>78034.0</td>\n",
" <td>2.229288</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11471</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>M.Phil / Ph.D</td>\n",
" <td>35004</td>\n",
" <td>1869.0</td>\n",
" <td>0.053394</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11472</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>Post Doctorate</td>\n",
" <td>35004</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11473</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>No Response</td>\n",
" <td>35004</td>\n",
" <td>6.0</td>\n",
" <td>0.000171</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>11474 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Year State_Name State_Code \\\n",
"0 2013 JAMMU & KASHMIR 1 \n",
"1 2013 JAMMU & KASHMIR 1 \n",
"2 2013 JAMMU & KASHMIR 1 \n",
"3 2013 JAMMU & KASHMIR 1 \n",
"4 2013 JAMMU & KASHMIR 1 \n",
"... ... ... ... \n",
"11469 2014 TELANGANA 36 \n",
"11470 2014 TELANGANA 36 \n",
"11471 2014 TELANGANA 36 \n",
"11472 2014 TELANGANA 36 \n",
"11473 2014 TELANGANA 36 \n",
"\n",
" Main_Dimension Sub_Dimension \\\n",
"0 School by Category Government \n",
"1 School by Category Private \n",
"2 School by Category Madrasas & Unrecognised \n",
"3 Rural Schools Government \n",
"4 Rural Schools Private \n",
"... ... ... \n",
"11469 Teachers by Educational Qualification Graduate \n",
"11470 Teachers by Educational Qualification Post Graduate \n",
"11471 Teachers by Educational Qualification M.Phil / Ph.D \n",
"11472 Teachers by Educational Qualification Post Doctorate \n",
"11473 Teachers by Educational Qualification No Response \n",
"\n",
" Total_Population total total_by_pop \n",
"0 12549 23234.0 1.851462 \n",
"1 12549 5073.0 0.404255 \n",
"2 12549 0.0 0.000000 \n",
"3 12549 21708.0 1.729859 \n",
"4 12549 3584.0 0.285600 \n",
"... ... ... ... \n",
"11469 35004 128603.0 3.673952 \n",
"11470 35004 78034.0 2.229288 \n",
"11471 35004 1869.0 0.053394 \n",
"11472 35004 0.0 0.000000 \n",
"11473 35004 6.0 0.000171 \n",
"\n",
"[11474 rows x 8 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"id": "002e2939",
"metadata": {},
"source": [
"## Total schools"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "77729b50",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1542341961.py:9: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"fig_size_tr_w=10\n",
"fig_size_tr_h=8\n",
"file_name=\"tr.jpeg\"\n",
"def total_schools(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
"\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School by Category\"]\n",
" df_fil=df_fil.loc[df_fil['State_Code'] == state_code]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"total_schools(df,2,file_name)"
]
},
{
"cell_type": "markdown",
"id": "3e5f22ff",
"metadata": {},
"source": [
"## Total schools by category"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8bb4d97b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2271420292.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_schools_by_cat(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School by Category\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Government\")]\n",
" df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Private\")]\n",
" df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == \"Madrasas & Unrecognised\")]\n",
" \n",
" plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')\n",
" plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')\n",
" plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')\n",
" plt.legend()\n",
" \n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"\n",
"\n",
"total_schools_by_cat(df,1,file_name)"
]
},
{
"cell_type": "markdown",
"id": "d5c76a13",
"metadata": {},
"source": [
"## Total schools by gender"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ec0feb43",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1307616346.py:7: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_schools_by_gender(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School by Gender\"]\n",
" \n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_boys=df_sum.loc[(df_sum['Sub_Dimension'] == \"Boys Only\")]\n",
" df_girls=df_sum.loc[(df_sum['Sub_Dimension'] == \"Girls Only\")]\n",
" \n",
" plt.plot(df_boys['Year'],df_boys['total'],color='r', label='Boys Only')\n",
" plt.plot(df_girls['Year'],df_girls['total'],color='g', label='Girls Only')\n",
" plt.legend()\n",
" plt.xticks(np.arange(2014,2017, 1.0))\n",
" \n",
"# plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"\n",
"\n",
"total_schools_by_gender(df,1,file_name)"
]
},
{
"cell_type": "markdown",
"id": "70e8f3e4",
"metadata": {},
"source": [
"## Total schools by rural"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "fe017c90",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2577340614.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_schools_by_rural(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Rural Schools\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Government\")]\n",
" df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Private\")]\n",
" df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == \"Madrasas & Unrecognised\")]\n",
" \n",
" plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')\n",
" plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')\n",
" plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')\n",
" plt.legend()\n",
" \n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"\n",
"total_schools_by_rural(df,32,file_name)\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "04eccc6d",
"metadata": {},
"source": [
"## Total enrollments"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "95a39e8b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2425626090.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_enrolment(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Elementary Enrolment by School Category\"]\n",
" df_fil=df_fil.loc[df_fil['State_Code'] == state_code]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Enrollments')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"total_enrolment(df,7,file_name)\n"
]
},
{
"cell_type": "markdown",
"id": "13d9f81d",
"metadata": {},
"source": [
"## Total enrollments by category"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b93efbb4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/3525156651.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_enrolment_cat(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Elementary Enrolment by School Category\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Government\")]\n",
" df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Private\")]\n",
" df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == \"Madrasas & Unrecognised\")]\n",
" \n",
" plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')\n",
" plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')\n",
" plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')\n",
" plt.legend()\n",
" \n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Enrolments')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"\n",
"\n",
"total_enrolment_cat(df,32,file_name)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "33aa4a2f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/356455259.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_enrolment_rural(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Rural Elementary Enrolment\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Government\")]\n",
" df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Private\")]\n",
" df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == \"Madrasas & Unrecognised\")]\n",
" \n",
" plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')\n",
" plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')\n",
" plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')\n",
" plt.legend()\n",
" \n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Enrollments')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"\n",
"\n",
"total_enrolment_rural(df,32,file_name)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3087862a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2919692743.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_teachers(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by School Category\"]\n",
" df_fil=df_fil.loc[df_fil['State_Code'] == state_code]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of Teachers')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"total_teachers(df,7,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "fc3c9288",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/3791890994.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_teachers_by_cat(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by School Category\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Government\")]\n",
" df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == \"Private\")]\n",
" df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == \"Madrasas & Unrecognised\")]\n",
" \n",
" plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')\n",
" plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')\n",
" plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')\n",
" plt.legend()\n",
" \n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of Teachers')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"\n",
"\n",
"total_teachers_by_cat(df,32,file_name)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "5b191fa5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/3895896839.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_schools_by_teachers_qual(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by Educational Qualification\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]\n",
" df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()\n",
" df_sum=df_sum[['Sub_Dimension','State_Code','Year','total']]\n",
" \n",
" df_bel_sec=df_sum.loc[(df_sum['Sub_Dimension'] == \"Below Secondary\")]\n",
" df_sec=df_sum.loc[(df_sum['Sub_Dimension'] == \"Secondary\")]\n",
" df_hs=df_sum.loc[(df_sum['Sub_Dimension'] == \"Higher Secondary\")]\n",
" df_grad=df_sum.loc[(df_sum['Sub_Dimension'] == \"Graduate\")]\n",
" df_pg=df_sum.loc[(df_sum['Sub_Dimension'] == \"Post Graduate\")]\n",
" df_phd=df_sum.loc[(df_sum['Sub_Dimension'] == \"M.Phil / Ph.D\")]\n",
" \n",
" \n",
" \n",
" \n",
" plt.plot(df_bel_sec['Year'],df_bel_sec['total'],color='r', label='Below Secondary')\n",
" plt.plot(df_sec['Year'],df_sec['total'],color='g', label='Secondary')\n",
" plt.plot(df_hs['Year'],df_hs['total'],color='b', label='Higher Secondary')\n",
" plt.plot(df_grad['Year'],df_grad['total'],color='y', label='Graduate')\n",
" plt.plot(df_pg['Year'],df_pg['total'],color='c', label='Post Graduate')\n",
" plt.plot(df_phd['Year'],df_phd['total'],color='m', label='M.Phil / Ph.D')\n",
" plt.legend()\n",
" \n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of Teachers')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"\n",
"\n",
"total_schools_by_teachers_qual(df,32,file_name)\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "13ed74af",
"metadata": {},
"source": [
"## total classrooms"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "268a5f84",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1933095301.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_classrooms_by_goodcond(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Total Classrooms\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Good Condition\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of classrooms')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"total_classrooms_by_goodcond(df,7,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "2c66710e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/4134182718.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_classrooms_by_needminrepair(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Total Classrooms\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Need Minor Repair\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of classrooms')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"total_classrooms_by_needminrepair(df,7,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a981a04b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1010963564.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_classrooms_by_needmajorrepair(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Total Classrooms\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Need Major Repair\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of classrooms')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"total_classrooms_by_needmajorrepair(df,7,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "a5c3baee",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1869549921.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def total_classrooms_by_needmajorrepair(main_df,state_code,cond,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Total Classrooms\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == cond)]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of classrooms')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" \n",
"total_classrooms_by_needmajorrepair(df,7,\"Need Major Repair\",file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "efd2f8e0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "3ba17847",
"metadata": {},
"source": [
"## School facilities"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "d0dd73f0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2056682837.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac_singleclassschool(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Single Classroom Schools\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"school_fac_singleclassschool(df,7,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "04045acd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2401885379.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac_singleteacherchool(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Single Teacher Schools\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"school_fac_singleteacherchool(df,7,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "0915fade",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1816513266.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac_enrol_less_50(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Enrolment <= 50\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"school_fac_enrol_less_50(df,28,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "47b72d94",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/2792766097.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac_girls_toi(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Schools with Girls' Toilet\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"school_fac_girls_toi(df,24,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "9c683e6c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/1076733821.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac_drink_wat(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Schools with Drinking Water\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
"school_fac_drink_wat(df,24,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "7a48b297",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/4137262132.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac_ramp(main_df,state_code,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == \"Schools with Ramp\")]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"school_fac_ramp(df,24,file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "045a02a2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_61569/3013851162.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n"
]
}
],
"source": [
"# state_code=8\n",
"def school_fac(main_df,state_code,fac,file_name):\n",
" fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"School Facilities\"]\n",
" df_fil=df_fil.loc[(df_fil['State_Code'] == state_code) & (df_fil['Sub_Dimension'] == fac)]\n",
" df_sum = df_fil.groupby(['State_Code','Year'],as_index = False).sum()\n",
" plt.plot(df_sum['Year'],df_sum['total'])\n",
" plt.xticks(np.arange(2006,2017, 1.0))\n",
" plt.ylabel('Number of schools')\n",
" plt.xlabel('Year')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"school_fac(df,15,\"Schools with Ramp\",file_name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8333eea",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3bf66712",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (edu)",
"language": "python",
"name": "edu"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment