state comp pynb added

c753cddd · Shubham V · a6c067da · c753cddd
Commit c753cddd authored Nov 06, 2022 by Shubham V
Hide whitespace changes
Inline Side-by-side

Showing with 890 additions and 0 deletions

test/state comp.ipynb test/state comp.ipynb +890 -0

No files found.
--- a/test/state comp.ipynb
+++ b/test/state comp.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "7caa9b98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "import glob\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "697e552d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_all_csv():\n",
+    "    path = './'\n",
+    "    extension = 'csv'\n",
+    "    os.chdir(path)\n",
+    "    result = glob.glob('SRC_R*.{}'.format(extension))\n",
+    "    df=None\n",
+    "    for file in result:\n",
+    "        if(df is None):\n",
+    "            df=pd.read_csv(file);\n",
+    "        else:\n",
+    "            df1=pd.read_csv(file);\n",
+    "            df=pd.concat([df,df1], ignore_index=True)\n",
+    "    df_master=pd.read_csv(\"SRC_MasterData.csv\")\n",
+    "    df_master=df_master[['State_Code','State_Name','Total_Population']]\n",
+    "    final_df = pd.merge(df, df_master, on='State_Code', how='inner')\n",
+    "    final_df['total']=final_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
+    "    final_df['total_by_pop']=final_df['total']/final_df['Total_Population']\n",
+    "\n",
+    "    final_df=final_df[[\"Year\",\"State_Name\",\"State_Code\",\"Main_Dimension\",\"Sub_Dimension\",\"Total_Population\",'total','total_by_pop']]\n",
+    "\n",
+    "\n",
+    "    return final_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1365fc80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "final_df=read_all_csv();"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "52390a07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !ls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e3ed6eed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df_master=pd.read_csv(\"src_m.csv\")\n",
+    "# df_master=df_master[['State_Code','State_Name','Total_Population']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2f8e77a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # inner join\n",
+    "# final_df = pd.merge(df, df_master, on='State_Code', how='inner')\n",
+    "# final_df['total']=final_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
+    "# final_df['total_by_pop']=final_df['total']/final_df['Total_Population']\n",
+    "\n",
+    "# final_df=final_df[[\"Year\",\"State_Name\",\"State_Code\",\"Main_Dimension\",\"Sub_Dimension\",\"Total_Population\",'total','total_by_pop']]\n",
+    "# final_df.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3ef6188",
+   "metadata": {},
+   "source": [
+    "## Schools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "6674a309",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/3820314783.py:9: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['MEGHALAYA', 'ARUNACHAL PRADESH', 'MIZORAM'],\n",
+       " 'bottom3': ['WEST BENGAL', 'CHANDIGARH', 'DELHI']}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dict_summary={\"top3\":[],\"bottom3\":[]}\n",
+    "fig_size_w=16\n",
+    "fig_size_h=13\n",
+    "\n",
+    "\n",
+    "def state_total_schools(main_df,year,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"School by Category\")]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\",fontsize=8)\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Schools/population',fontsize=12)\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "state_total_schools(final_df,2013,\"foo.jpeg\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c01f8ffa",
+   "metadata": {},
+   "source": [
+    "## Schools by cat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "880715a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/79703392.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['MEGHALAYA', 'MIZORAM', 'SIKKIM'],\n",
+       " 'bottom3': ['LAKSHADWEEP', 'WEST BENGAL', 'BIHAR']}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_schools_by_cat(main_df,year,type_school,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"School by Category\") &(main_df['Sub_Dimension'] == type_school)]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Schools/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_total_schools_by_cat(final_df,2014,\"Private\",\"foo\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "cd1994d7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Year</th>\n",
+       "      <th>State_Name</th>\n",
+       "      <th>State_Code</th>\n",
+       "      <th>Main_Dimension</th>\n",
+       "      <th>Sub_Dimension</th>\n",
+       "      <th>Total_Population</th>\n",
+       "      <th>total</th>\n",
+       "      <th>total_by_pop</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2013</td>\n",
+       "      <td>JAMMU &amp; KASHMIR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>School by Category</td>\n",
+       "      <td>Government</td>\n",
+       "      <td>12549</td>\n",
+       "      <td>23234.0</td>\n",
+       "      <td>1.851462</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2013</td>\n",
+       "      <td>JAMMU &amp; KASHMIR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>School by Category</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>12549</td>\n",
+       "      <td>5073.0</td>\n",
+       "      <td>0.404255</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2013</td>\n",
+       "      <td>JAMMU &amp; KASHMIR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>School by Category</td>\n",
+       "      <td>Madrasas &amp; Unrecognised</td>\n",
+       "      <td>12549</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2013</td>\n",
+       "      <td>JAMMU &amp; KASHMIR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Rural Schools</td>\n",
+       "      <td>Government</td>\n",
+       "      <td>12549</td>\n",
+       "      <td>21708.0</td>\n",
+       "      <td>1.729859</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2013</td>\n",
+       "      <td>JAMMU &amp; KASHMIR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Rural Schools</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>12549</td>\n",
+       "      <td>3584.0</td>\n",
+       "      <td>0.285600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11469</th>\n",
+       "      <td>2014</td>\n",
+       "      <td>TELANGANA</td>\n",
+       "      <td>36</td>\n",
+       "      <td>Teachers by Educational Qualification</td>\n",
+       "      <td>Graduate</td>\n",
+       "      <td>35004</td>\n",
+       "      <td>128603.0</td>\n",
+       "      <td>3.673952</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11470</th>\n",
+       "      <td>2014</td>\n",
+       "      <td>TELANGANA</td>\n",
+       "      <td>36</td>\n",
+       "      <td>Teachers by Educational Qualification</td>\n",
+       "      <td>Post Graduate</td>\n",
+       "      <td>35004</td>\n",
+       "      <td>78034.0</td>\n",
+       "      <td>2.229288</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11471</th>\n",
+       "      <td>2014</td>\n",
+       "      <td>TELANGANA</td>\n",
+       "      <td>36</td>\n",
+       "      <td>Teachers by Educational Qualification</td>\n",
+       "      <td>M.Phil / Ph.D</td>\n",
+       "      <td>35004</td>\n",
+       "      <td>1869.0</td>\n",
+       "      <td>0.053394</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11472</th>\n",
+       "      <td>2014</td>\n",
+       "      <td>TELANGANA</td>\n",
+       "      <td>36</td>\n",
+       "      <td>Teachers by Educational Qualification</td>\n",
+       "      <td>Post Doctorate</td>\n",
+       "      <td>35004</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11473</th>\n",
+       "      <td>2014</td>\n",
+       "      <td>TELANGANA</td>\n",
+       "      <td>36</td>\n",
+       "      <td>Teachers by Educational Qualification</td>\n",
+       "      <td>No Response</td>\n",
+       "      <td>35004</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>0.000171</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>11474 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Year       State_Name  State_Code  \\\n",
+       "0      2013  JAMMU & KASHMIR           1   \n",
+       "1      2013  JAMMU & KASHMIR           1   \n",
+       "2      2013  JAMMU & KASHMIR           1   \n",
+       "3      2013  JAMMU & KASHMIR           1   \n",
+       "4      2013  JAMMU & KASHMIR           1   \n",
+       "...     ...              ...         ...   \n",
+       "11469  2014        TELANGANA          36   \n",
+       "11470  2014        TELANGANA          36   \n",
+       "11471  2014        TELANGANA          36   \n",
+       "11472  2014        TELANGANA          36   \n",
+       "11473  2014        TELANGANA          36   \n",
+       "\n",
+       "                              Main_Dimension            Sub_Dimension  \\\n",
+       "0                         School by Category               Government   \n",
+       "1                         School by Category                  Private   \n",
+       "2                         School by Category  Madrasas & Unrecognised   \n",
+       "3                              Rural Schools               Government   \n",
+       "4                              Rural Schools                  Private   \n",
+       "...                                      ...                      ...   \n",
+       "11469  Teachers by Educational Qualification                 Graduate   \n",
+       "11470  Teachers by Educational Qualification            Post Graduate   \n",
+       "11471  Teachers by Educational Qualification            M.Phil / Ph.D   \n",
+       "11472  Teachers by Educational Qualification           Post Doctorate   \n",
+       "11473  Teachers by Educational Qualification              No Response   \n",
+       "\n",
+       "       Total_Population     total  total_by_pop  \n",
+       "0                 12549   23234.0      1.851462  \n",
+       "1                 12549    5073.0      0.404255  \n",
+       "2                 12549       0.0      0.000000  \n",
+       "3                 12549   21708.0      1.729859  \n",
+       "4                 12549    3584.0      0.285600  \n",
+       "...                 ...       ...           ...  \n",
+       "11469             35004  128603.0      3.673952  \n",
+       "11470             35004   78034.0      2.229288  \n",
+       "11471             35004    1869.0      0.053394  \n",
+       "11472             35004       0.0      0.000000  \n",
+       "11473             35004       6.0      0.000171  \n",
+       "\n",
+       "[11474 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "final_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "bea15640",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/1282086341.py:3: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['MEGHALAYA', 'MEGHALAYA', 'MEGHALAYA'],\n",
+       " 'bottom3': ['DELHI', 'DELHI', 'DELHI']}"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def state_total_schools_by_rural(main_df,year,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Rural Schools\")]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Schools/population')    \n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_total_schools_by_rural(final_df,2014,\"foo\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "ea502ae3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/2771868427.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['SIKKIM', 'MIZORAM', 'ARUNACHAL PRADESH'],\n",
+       " 'bottom3': ['WEST BENGAL', 'ANDHRA PRADESH', 'BIHAR']}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_teachers(main_df,year,file_name):\n",
+    "    df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by School Category\"]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Teachers/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_total_teachers(final_df,2016,\"foo\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "981d83de",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/2603743918.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['MEGHALAYA', 'MIZORAM', 'SIKKIM'],\n",
+       " 'bottom3': ['LAKSHADWEEP', 'WEST BENGAL', 'BIHAR']}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_teachers_by_cat(main_df,year,type_school,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Teachers by School Category\") &(main_df['Sub_Dimension'] == type_school)]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Teachers/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_total_teachers_by_cat(final_df,2014,\"Private\",\"foo\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "3e87176b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/1555212325.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['SIKKIM', 'NAGALAND', 'MANIPUR'],\n",
+       " 'bottom3': ['WEST BENGAL', 'KARNATAKA', 'BIHAR']}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_teachers_by_qual(main_df,year,qual_type,file_name):\n",
+    "    df_fil=main_df.loc[main_df['Main_Dimension'] == \"Teachers by Educational Qualification\"]\n",
+    "    df_fil=df_fil.loc[main_df['Sub_Dimension'] == qual_type]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Teachers/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "state_total_teachers_by_qual(final_df,2014,\"Graduate\",\"foo\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "6baccf51",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/3696739622.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['MIZORAM', 'ARUNACHAL PRADESH', 'MEGHALAYA'],\n",
+       " 'bottom3': ['WEST BENGAL', 'CHANDIGARH', 'DELHI']}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_school_fac(main_df,year,facility,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"School Facilities\") & (main_df['Sub_Dimension'] == facility)]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Schools/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_school_fac(final_df,2014,\"Schools with Girls' Toilet\",\"foo\")\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "f42ca283",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_55575/3535025734.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['SIKKIM', 'HIMACHAL PRADESH', 'A&N ISLANDS'],\n",
+       " 'bottom3': ['WEST BENGAL', 'BIHAR', 'MANIPUR']}"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_class(main_df,year,cond,file_name):\n",
+    "#     main_df['total']=main_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with _Sec']].sum(axis=1)\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Total Classrooms\") &(main_df['Sub_Dimension'] == cond)]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Classrooms/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_total_class(final_df,2010,\"Good Condition\",\"foo\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "90789f2c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['ARUNACHAL PRADESH', 'MEGHALAYA', 'MIZORAM'],\n",
+       " 'bottom3': ['WEST BENGAL', 'KERALA', 'DAMAN & DIU']}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_enrollments(main_df,year,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Elementary Enrolment by School Category\")]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_fil=df_fil[['Year','State_Name','total','total_by_pop']]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Enrollements/population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "#     plt.savefig(\"images/\"+filename+\".png\");\n",
+    "#     plt.close();\n",
+    "#     plt.close(fig);\n",
+    "\n",
+    "state_total_enrollments(final_df,2010,\"enrol\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "06025324",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['NAGALAND', 'MEGHALAYA', 'MANIPUR'],\n",
+       " 'bottom3': ['LAKSHADWEEP', 'BIHAR', 'WEST BENGAL']}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_enrollments_by_cat(main_df,year,cat_type,file_name):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Elementary Enrolment by School Category\") &((main_df['Sub_Dimension'] == cat_type))]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_fil=df_fil[['Year','State_Name','total','total_by_pop']]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Enrollements/Population')\n",
+    "    list_states=list(df_sum['State_Name'])\n",
+    "    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]];\n",
+    "    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]];\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "    return dict_summary\n",
+    "\n",
+    "\n",
+    "state_total_enrollments_by_cat(final_df,2010,\"Private\",\"enrol\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "3e7fe81e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'top3': ['ARUNACHAL PRADESH', 'MEGHALAYA', 'MIZORAM'],\n",
+       " 'bottom3': ['WEST BENGAL', 'KERALA', 'DAMAN & DIU']}"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "def state_total_enrollments_by_rural(main_df,year,filename):\n",
+    "    df_fil=main_df.loc[(main_df['Main_Dimension'] == \"Rural Schools\")]\n",
+    "    df_fil=df_fil.loc[df_fil['Year'] == year]\n",
+    "    df_fil=df_fil[['Year','State_Name','total','total_by_pop']]\n",
+    "    df_sum = df_fil.groupby(['State_Name','Year'],as_index = False).sum().sort_values(\"total_by_pop\",ascending=False)\n",
+    "    fig = plt.figure(figsize=(fig_size_w,fig_size_h))\n",
+    "    plt.xticks(rotation=90, ha=\"right\")\n",
+    "    plt.bar(df_sum['State_Name'],df_sum['total_by_pop'], align='center')\n",
+    "    plt.ylabel('Enrollements/population')\n",
+    "    plt.savefig(file_name);\n",
+    "    plt.close(fig);\n",
+    "\n",
+    "    return df_sum\n",
+    "\n",
+    "state_total_enrollments(final_df,2010,\"enrol\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa04749f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb78eff7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "543f8bce",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79058fce",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (edu)",
+   "language": "python",
+   "name": "edu"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}