Commit c753cddd authored by Shubham V's avatar Shubham V

state comp pynb added

parent a6c067da
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7caa9b98",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import glob\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "697e552d",
"metadata": {},
"outputs": [],
"source": [
"def read_all_csv():\n",
" path = './'\n",
" extension = 'csv'\n",
" os.chdir(path)\n",
" result = glob.glob('SRC_R*.{}'.format(extension))\n",
" df=None\n",
" for file in result:\n",
" if(df is None):\n",
" df=pd.read_csv(file);\n",
" else:\n",
" df1=pd.read_csv(file);\n",
" df=pd.concat([df,df1], ignore_index=True)\n",
" df_master=pd.read_csv(\"SRC_MasterData.csv\")\n",
" df_master=df_master[['State_Code','State_Name','Total_Population']]\n",
" final_df = pd.merge(df, df_master, on='State_Code', how='inner')\n",
" final_df['total']=final_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
" final_df['total_by_pop']=final_df['total']/final_df['Total_Population']\n",
"\n",
" final_df=final_df[[\"Year\",\"State_Name\",\"State_Code\",\"Main_Dimension\",\"Sub_Dimension\",\"Total_Population\",'total','total_by_pop']]\n",
"\n",
"\n",
" return final_df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1365fc80",
"metadata": {},
"outputs": [],
"source": [
"final_df=read_all_csv();"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "52390a07",
"metadata": {},
"outputs": [],
"source": [
"# !ls"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e3ed6eed",
"metadata": {},
"outputs": [],
"source": [
"# df_master=pd.read_csv(\"src_m.csv\")\n",
"# df_master=df_master[['State_Code','State_Name','Total_Population']]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2f8e77a6",
"metadata": {},
"outputs": [],
"source": [
"# # inner join\n",
"# final_df = pd.merge(df, df_master, on='State_Code', how='inner')\n",
"# final_df['total']=final_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
"# final_df['total_by_pop']=final_df['total']/final_df['Total_Population']\n",
"\n",
"# final_df=final_df[[\"Year\",\"State_Name\",\"State_Code\",\"Main_Dimension\",\"Sub_Dimension\",\"Total_Population\",'total','total_by_pop']]\n",
"# final_df.columns"
]
},
{
"cell_type": "markdown",
"id": "e3ef6188",
"metadata": {},
"source": [
"## Schools"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6674a309",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/3820314783.py:9: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['MEGHALAYA', 'ARUNACHAL PRADESH', 'MIZORAM'],\n",
" 'bottom3': ['WEST BENGAL', 'CHANDIGARH', 'DELHI']}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dict_summary={\"top3\":[],\"bottom3\":[]}\n",
"fig_size_w=16\n",
"fig_size_h=13\n",
"\n",
"\n",
"def state_total_schools(main_df,year,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"School by Category\")]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\",fontsize=8)\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Schools/population',fontsize=12)\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"state_total_schools(final_df,2013,\"foo.jpeg\")"
]
},
{
"cell_type": "markdown",
"id": "c01f8ffa",
"metadata": {},
"source": [
"## Schools by cat"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "880715a8",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/79703392.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['MEGHALAYA', 'MIZORAM', 'SIKKIM'],\n",
" 'bottom3': ['LAKSHADWEEP', 'WEST BENGAL', 'BIHAR']}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_schools_by_cat(main_df,year,type_school,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"School by Category\") &(main_df['Sub_Dimension'] == type_school)]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
"\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Schools/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_total_schools_by_cat(final_df,2014,\"Private\",\"foo\")\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "cd1994d7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>State_Name</th>\n",
" <th>State_Code</th>\n",
" <th>Main_Dimension</th>\n",
" <th>Sub_Dimension</th>\n",
" <th>Total_Population</th>\n",
" <th>total</th>\n",
" <th>total_by_pop</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>School by Category</td>\n",
" <td>Government</td>\n",
" <td>12549</td>\n",
" <td>23234.0</td>\n",
" <td>1.851462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>School by Category</td>\n",
" <td>Private</td>\n",
" <td>12549</td>\n",
" <td>5073.0</td>\n",
" <td>0.404255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>School by Category</td>\n",
" <td>Madrasas &amp; Unrecognised</td>\n",
" <td>12549</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>Rural Schools</td>\n",
" <td>Government</td>\n",
" <td>12549</td>\n",
" <td>21708.0</td>\n",
" <td>1.729859</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2013</td>\n",
" <td>JAMMU &amp; KASHMIR</td>\n",
" <td>1</td>\n",
" <td>Rural Schools</td>\n",
" <td>Private</td>\n",
" <td>12549</td>\n",
" <td>3584.0</td>\n",
" <td>0.285600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11469</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>Graduate</td>\n",
" <td>35004</td>\n",
" <td>128603.0</td>\n",
" <td>3.673952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11470</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>Post Graduate</td>\n",
" <td>35004</td>\n",
" <td>78034.0</td>\n",
" <td>2.229288</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11471</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>M.Phil / Ph.D</td>\n",
" <td>35004</td>\n",
" <td>1869.0</td>\n",
" <td>0.053394</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11472</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>Post Doctorate</td>\n",
" <td>35004</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11473</th>\n",
" <td>2014</td>\n",
" <td>TELANGANA</td>\n",
" <td>36</td>\n",
" <td>Teachers by Educational Qualification</td>\n",
" <td>No Response</td>\n",
" <td>35004</td>\n",
" <td>6.0</td>\n",
" <td>0.000171</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>11474 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Year State_Name State_Code \\\n",
"0 2013 JAMMU & KASHMIR 1 \n",
"1 2013 JAMMU & KASHMIR 1 \n",
"2 2013 JAMMU & KASHMIR 1 \n",
"3 2013 JAMMU & KASHMIR 1 \n",
"4 2013 JAMMU & KASHMIR 1 \n",
"... ... ... ... \n",
"11469 2014 TELANGANA 36 \n",
"11470 2014 TELANGANA 36 \n",
"11471 2014 TELANGANA 36 \n",
"11472 2014 TELANGANA 36 \n",
"11473 2014 TELANGANA 36 \n",
"\n",
" Main_Dimension Sub_Dimension \\\n",
"0 School by Category Government \n",
"1 School by Category Private \n",
"2 School by Category Madrasas & Unrecognised \n",
"3 Rural Schools Government \n",
"4 Rural Schools Private \n",
"... ... ... \n",
"11469 Teachers by Educational Qualification Graduate \n",
"11470 Teachers by Educational Qualification Post Graduate \n",
"11471 Teachers by Educational Qualification M.Phil / Ph.D \n",
"11472 Teachers by Educational Qualification Post Doctorate \n",
"11473 Teachers by Educational Qualification No Response \n",
"\n",
" Total_Population total total_by_pop \n",
"0 12549 23234.0 1.851462 \n",
"1 12549 5073.0 0.404255 \n",
"2 12549 0.0 0.000000 \n",
"3 12549 21708.0 1.729859 \n",
"4 12549 3584.0 0.285600 \n",
"... ... ... ... \n",
"11469 35004 128603.0 3.673952 \n",
"11470 35004 78034.0 2.229288 \n",
"11471 35004 1869.0 0.053394 \n",
"11472 35004 0.0 0.000000 \n",
"11473 35004 6.0 0.000171 \n",
"\n",
"[11474 rows x 8 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "bea15640",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/1282086341.py:3: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['MEGHALAYA', 'MEGHALAYA', 'MEGHALAYA'],\n",
" 'bottom3': ['DELHI', 'DELHI', 'DELHI']}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def state_total_schools_by_rural(main_df,year,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Rural Schools\")]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
"\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Schools/population') \n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_total_schools_by_rural(final_df,2014,\"foo\")\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ea502ae3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/2771868427.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['SIKKIM', 'MIZORAM', 'ARUNACHAL PRADESH'],\n",
" 'bottom3': ['WEST BENGAL', 'ANDHRA PRADESH', 'BIHAR']}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_teachers(main_df,year,file_name):\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by School Category\"]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
"\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Teachers/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_total_teachers(final_df,2016,\"foo\")\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "981d83de",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/2603743918.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['MEGHALAYA', 'MIZORAM', 'SIKKIM'],\n",
" 'bottom3': ['LAKSHADWEEP', 'WEST BENGAL', 'BIHAR']}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_teachers_by_cat(main_df,year,type_school,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Teachers by School Category\") &(main_df['Sub_Dimension'] == type_school)]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Teachers/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_total_teachers_by_cat(final_df,2014,\"Private\",\"foo\")\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3e87176b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/1555212325.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['SIKKIM', 'NAGALAND', 'MANIPUR'],\n",
" 'bottom3': ['WEST BENGAL', 'KARNATAKA', 'BIHAR']}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_teachers_by_qual(main_df,year,qual_type,file_name):\n",
" df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by Educational Qualification\"]\n",
" df_fil=df_fil.loc[main_df['Sub_Dimension'] == qual_type]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
"\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Teachers/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"state_total_teachers_by_qual(final_df,2014,\"Graduate\",\"foo\")\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "6baccf51",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/3696739622.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['MIZORAM', 'ARUNACHAL PRADESH', 'MEGHALAYA'],\n",
" 'bottom3': ['WEST BENGAL', 'CHANDIGARH', 'DELHI']}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_school_fac(main_df,year,facility,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"School Facilities\") & (main_df['Sub_Dimension'] == facility)]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
"\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Schools/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_school_fac(final_df,2014,\"Schools with Girls' Toilet\",\"foo\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f42ca283",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55575/3535025734.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
]
},
{
"data": {
"text/plain": [
"{'top3': ['SIKKIM', 'HIMACHAL PRADESH', 'A&N ISLANDS'],\n",
" 'bottom3': ['WEST BENGAL', 'BIHAR', 'MANIPUR']}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_class(main_df,year,cond,file_name):\n",
"# main_df['total']=main_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Total Classrooms\") &(main_df['Sub_Dimension'] == cond)]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
"\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Classrooms/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_total_class(final_df,2010,\"Good Condition\",\"foo\")\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "90789f2c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'top3': ['ARUNACHAL PRADESH', 'MEGHALAYA', 'MIZORAM'],\n",
" 'bottom3': ['WEST BENGAL', 'KERALA', 'DAMAN & DIU']}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_enrollments(main_df,year,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Elementary Enrolment by School Category\")]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_fil=df_fil[['Year','State_Name','total','total_by_pop']]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Enrollements/population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"# plt.savefig(\"images/\"+filename+\".png\");\n",
"# plt.close();\n",
"# plt.close(fig);\n",
"\n",
"state_total_enrollments(final_df,2010,\"enrol\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "06025324",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'top3': ['NAGALAND', 'MEGHALAYA', 'MANIPUR'],\n",
" 'bottom3': ['LAKSHADWEEP', 'BIHAR', 'WEST BENGAL']}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_enrollments_by_cat(main_df,year,cat_type,file_name):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Elementary Enrolment by School Category\") &((main_df['Sub_Dimension'] == cat_type))]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_fil=df_fil[['Year','State_Name','total','total_by_pop']]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Enrollements/Population')\n",
" list_states=list(df_sum['State_Name'])\n",
" dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
" dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
" return dict_summary\n",
"\n",
"\n",
"state_total_enrollments_by_cat(final_df,2010,\"Private\",\"enrol\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "3e7fe81e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'top3': ['ARUNACHAL PRADESH', 'MEGHALAYA', 'MIZORAM'],\n",
" 'bottom3': ['WEST BENGAL', 'KERALA', 'DAMAN & DIU']}"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"def state_total_enrollments_by_rural(main_df,year,filename):\n",
" df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Rural Schools\")]\n",
" df_fil=df_fil.loc[df_fil['Year'] == year]\n",
" df_fil=df_fil[['Year','State_Name','total','total_by_pop']]\n",
" df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
" fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
" plt.xticks(rotation=90, ha=\"right\")\n",
" plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
" plt.ylabel('Enrollements/population')\n",
" plt.savefig(file_name);\n",
" plt.close(fig);\n",
"\n",
" return df_sum\n",
"\n",
"state_total_enrollments(final_df,2010,\"enrol\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa04749f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb78eff7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "543f8bce",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "79058fce",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (edu)",
"language": "python",
"name": "edu"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment