import pandas as pd
import numpy as np
import os
import glob
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

dict_summary={"top3":[],"bottom3":[]}
fig_size_w, fig_size_h = 8, 6
fig_size_tr_w, fig_size_tr_h = 8,6

def read_all_csv(path='./rawdata_csv/', extension='csv'):
    result = glob.glob(path+'SRC_R*.{}'.format(extension))
    df=None
    
    for file in result:
        if(df is None):
            df=pd.read_csv(file)
        else:
            df=pd.concat([df,pd.read_csv(file)], ignore_index=True)
    
    df_master=pd.read_csv("SRC_MasterData.csv")
    df_master=df_master[['State_Code','State_Name','Total_Population']]
    final_df = pd.merge(df, df_master, on='State_Code', how='inner')
    final_df['total']=final_df[['Primary_Only','Primary_with_Upper_Primary','Primary with_Upper_Primary_Sec_H.Sec','Upper_Primary_Only','Upper_Primary_with_Sec_H.Sec','Primary_with_Upper_Primary_Sec','Upper_Primary_with_Sec']].sum(axis=1)
    final_df['total_by_population']=final_df['total']/final_df['Total_Population']
    final_df=final_df[["Year","State_Name","State_Code","Main_Dimension","Sub_Dimension","Total_Population",'total','total_by_population']]

    return final_df

def statewise_distribution(output_filename, input_df, year, main_dimension, sub_dimension=''):
    if sub_dimension:
        df_filtered = input_df.loc[(input_df['Main_Dimension'] == main_dimension) & ((input_df['Sub_Dimension'] == sub_dimension))]
    else:
        df_filtered = input_df.loc[(input_df['Main_Dimension'] == main_dimension)]
    df_filtered = df_filtered.loc[df_filtered['Year'] == year]
    df_filtered = df_filtered[['Year','State_Name','total','total_by_population']]
    df_sum = df_filtered.groupby(['State_Name','Year'],as_index = False).sum().sort_values("total_by_population",ascending=False)
    
    fig = plt.figure(figsize=(fig_size_w,fig_size_h))
    plt.xticks(rotation=90, ha="right")
    plt.bar(df_sum['State_Name'],df_sum['total_by_population'], align='center')
    plt.ylabel('Enrollements/Population')
    list_states=list(df_sum['State_Name'])
    dict_summary['top3']=[list_states[0],list_states[1],list_states[2]]
    dict_summary['bottom3']=[list_states[-1],list_states[-2],list_states[-3]]
    plt.savefig(output_filename)
    plt.close(fig)
    
    return dict_summary

def total_enrolment_by_category(output_filename, input_df, main_dimension, state_code, ylabel):
    fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))
    df_fil = input_df.loc[input_df['Main_Dimension'] == main_dimension]
    df_fil = df_fil.loc[(df_fil['State_Code'] == state_code)]
    min_year, max_year = min(df_fil['Year']), max(df_fil['Year'])
    df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()
    df_sum = df_sum[['Sub_Dimension','State_Code','Year','total']]
    
    df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == "Government")]
    df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == "Private")]
    plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')
    plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')
    if not 'Rural' in main_dimension:
        df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == "Madrasas & Unrecognised")]
        plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')
    
    plt.legend()
    plt.xticks(np.arange(min_year, max_year+1, 1.0))
    plt.ylabel(ylabel)
    plt.xlabel('Year')
    plt.savefig(output_filename)
    plt.close(fig)

def total_teachers_by_main_dimension(output_filename, input_df, main_dimension, state_code):
    fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))
    df_fil = input_df.loc[input_df['Main_Dimension'] == main_dimension]
    df_fil = df_fil.loc[(df_fil['State_Code'] == state_code)]
    min_year, max_year = min(df_fil['Year']), max(df_fil['Year'])
    df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()
    df_sum = df_sum[['Sub_Dimension','State_Code','Year','total']]
    
    if 'Category' in main_dimension:
        df_govt = df_sum.loc[(df_sum['Sub_Dimension'] == "Government")]
        df_pvt = df_sum.loc[(df_sum['Sub_Dimension'] == "Private")]
        df_madr = df_sum.loc[(df_sum['Sub_Dimension'] == "Madrasas & Unrecognised")]
        
        plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')
        plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')
        plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')
    else:
        df_bel_sec = df_sum.loc[(df_sum['Sub_Dimension'] == "Below Secondary")]
        df_sec = df_sum.loc[(df_sum['Sub_Dimension'] == "Secondary")]
        df_hs = df_sum.loc[(df_sum['Sub_Dimension'] == "Higher Secondary")]
        df_grad = df_sum.loc[(df_sum['Sub_Dimension'] == "Graduate")]
        df_pg = df_sum.loc[(df_sum['Sub_Dimension'] == "Post Graduate")]
        df_phd = df_sum.loc[(df_sum['Sub_Dimension'] == "M.Phil / Ph.D")]

        plt.plot(df_bel_sec['Year'],df_bel_sec['total'],color='r', label='Below Secondary')
        plt.plot(df_sec['Year'],df_sec['total'],color='g', label='Secondary')
        plt.plot(df_hs['Year'],df_hs['total'],color='b', label='Higher Secondary')
        plt.plot(df_grad['Year'],df_grad['total'],color='y', label='Graduate')
        plt.plot(df_pg['Year'],df_pg['total'],color='c', label='Post Graduate')
        plt.plot(df_phd['Year'],df_phd['total'],color='m', label='M.Phil / Ph.D')

    plt.legend()
    plt.xticks(np.arange(min_year, max_year+1, 1.0))
    plt.ylabel('Number of Teachers')
    plt.xlabel('Year')
    plt.savefig(output_filename)
    plt.close(fig)

def total_classrooms_by_trend(output_filename, input_df, state_code):
    fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))
    df_fil=input_df.loc[input_df['Main_Dimension'] == "Total Classrooms"]
    df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]
    min_year, max_year = min(df_fil['Year']), max(df_fil['Year'])
    df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()
    df_sum = df_sum[['Sub_Dimension','State_Code','Year','total']]

    df_good_cond = df_sum.loc[(df_sum['Sub_Dimension'] == "Good Condition")]
    df_minor_repair = df_sum.loc[(df_sum['Sub_Dimension'] == "Need Minor Repair")]
    df_major_repair = df_sum.loc[(df_sum['Sub_Dimension'] == "Need Major Repair")]
    
    plt.plot(df_good_cond['Year'],df_good_cond['total'],color='r', label='Good Condition')
    plt.plot(df_minor_repair['Year'],df_minor_repair['total'],color='g', label='Need Minor Repair')
    plt.plot(df_major_repair['Year'],df_major_repair['total'],color='b', label='Need Major Repair')

    plt.legend()
    plt.xticks(np.arange(min_year, max_year+1, 1.0))
    plt.ylabel('Number of classrooms')
    plt.xlabel('Year')
    plt.savefig(output_filename)
    plt.close(fig)

def total_schools_for_facilities(output_filename, input_df, state_code):
    fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))
    df_fil=input_df.loc[input_df['Main_Dimension'] == "School Facilities"]
    df_fil=df_fil.loc[(df_fil['State_Code'] == state_code)]
    min_year, max_year = min(df_fil['Year']), max(df_fil['Year'])
    df_sum = df_fil.groupby(['Sub_Dimension','State_Code','Year'],as_index = False).sum()
    df_sum = df_sum[['Sub_Dimension','State_Code','Year','total']]
    
    df_enrolment_lt_50 = df_sum.loc[(df_sum['Sub_Dimension'] == "Enrolment <= 50")]
    df_drinking_water = df_sum.loc[(df_sum['Sub_Dimension'] == "Schools with Drinking Water")]
    df_girls_toilet = df_sum.loc[(df_sum['Sub_Dimension'] == "Schools with Girls' Toilet")]
    df_ramp = df_sum.loc[(df_sum['Sub_Dimension'] == "Schools with Ramp")]
    df_single_classroom = df_sum.loc[(df_sum['Sub_Dimension'] == "Single Classroom Schools")]
    df_single_teacher = df_sum.loc[(df_sum['Sub_Dimension'] == "Single Teacher Schools")]

    plt.plot(df_enrolment_lt_50['Year'], df_enrolment_lt_50['total'], color='r', label='Enrolment <= 50')
    plt.plot(df_drinking_water['Year'], df_drinking_water['total'], color='g', label='Schools with Drinking Water')
    plt.plot(df_girls_toilet['Year'], df_girls_toilet['total'], color='b', label='Schools with Girls\' Toilet')
    plt.plot(df_ramp['Year'], df_ramp['total'], color='y', label='Schools with Ramp')
    plt.plot(df_single_classroom['Year'], df_single_classroom['total'], color='c', label='Single Classroom Schools')
    plt.plot(df_single_teacher['Year'], df_single_teacher['total'], color='m', label='Single Teacher Schools')

    plt.legend()
    plt.xticks(np.arange(min_year, max_year+1, 1.0))
    plt.ylabel('Number of schools')
    plt.xlabel('Year')
    plt.savefig(output_filename)
    plt.close(fig)

def main_dimension_overall(output_filename, input_df, main_dimension, ylabel):
    fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))
    df_fil=input_df.loc[input_df['Main_Dimension'] == main_dimension]
    min_year, max_year = min(df_fil['Year']), max(df_fil['Year'])
    df_sum = df_fil.groupby(['Sub_Dimension','Year'],as_index = False).sum()
    df_sum = df_sum[['Sub_Dimension','Year','total']]

    if 'Classrooms' not in main_dimension:
        df_govt=df_sum.loc[(df_sum['Sub_Dimension'] == "Government")]
        df_pvt=df_sum.loc[(df_sum['Sub_Dimension'] == "Private")]
        df_madr=df_sum.loc[(df_sum['Sub_Dimension'] == "Madrasas & Unrecognised")]
        plt.plot(df_govt['Year'],df_govt['total'],color='r', label='Goverment')
        plt.plot(df_pvt['Year'],df_pvt['total'],color='g', label='Private')
        plt.plot(df_madr['Year'],df_madr['total'],color='b', label='Madrasas')
    else:
        df_good_cond = df_sum.loc[(df_sum['Sub_Dimension'] == "Good Condition")]
        df_minor_repair = df_sum.loc[(df_sum['Sub_Dimension'] == "Need Minor Repair")]
        df_major_repair = df_sum.loc[(df_sum['Sub_Dimension'] == "Need Major Repair")]
        plt.plot(df_good_cond['Year'],df_good_cond['total'],color='r', label='Good Condition')
        plt.plot(df_minor_repair['Year'],df_minor_repair['total'],color='g', label='Need Minor Repair')
        plt.plot(df_major_repair['Year'],df_major_repair['total'],color='b', label='Need Major Repair')
    
    plt.legend()
    plt.xticks(np.arange(min_year, max_year+1, 1.0))
    plt.ylabel(ylabel)
    plt.xlabel('Year')
    plt.savefig(output_filename)
    plt.close(fig)

def total_schools_for_facilities_overall(output_filename, input_df):
    fig = plt.figure(figsize=(fig_size_tr_w,fig_size_tr_h))
    df_fil=input_df.loc[input_df['Main_Dimension'] == "School Facilities"]
    min_year, max_year = min(df_fil['Year']), max(df_fil['Year'])
    df_sum = df_fil.groupby(['Sub_Dimension','Year'],as_index = False).sum()
    df_sum = df_sum[['Sub_Dimension','Year','total']]
    
    df_enrolment_lt_50 = df_sum.loc[(df_sum['Sub_Dimension'] == "Enrolment <= 50")]
    df_drinking_water = df_sum.loc[(df_sum['Sub_Dimension'] == "Schools with Drinking Water")]
    df_girls_toilet = df_sum.loc[(df_sum['Sub_Dimension'] == "Schools with Girls' Toilet")]
    df_ramp = df_sum.loc[(df_sum['Sub_Dimension'] == "Schools with Ramp")]
    df_single_classroom = df_sum.loc[(df_sum['Sub_Dimension'] == "Single Classroom Schools")]
    df_single_teacher = df_sum.loc[(df_sum['Sub_Dimension'] == "Single Teacher Schools")]

    plt.plot(df_enrolment_lt_50['Year'], df_enrolment_lt_50['total'], color='r', label='Enrolment <= 50')
    plt.plot(df_drinking_water['Year'], df_drinking_water['total'], color='g', label='Schools with Drinking Water')
    plt.plot(df_girls_toilet['Year'], df_girls_toilet['total'], color='b', label='Schools with Girls\' Toilet')
    plt.plot(df_ramp['Year'], df_ramp['total'], color='y', label='Schools with Ramp')
    plt.plot(df_single_classroom['Year'], df_single_classroom['total'], color='c', label='Single Classroom Schools')
    plt.plot(df_single_teacher['Year'], df_single_teacher['total'], color='m', label='Single Teacher Schools')

    plt.legend()
    plt.xticks(np.arange(min_year, max_year+1, 1.0))
    plt.ylabel('Number of schools')
    plt.xlabel('Year')
    plt.savefig(output_filename)
    plt.close(fig)