import numpy as np
import argparse
import csv
import matplotlib.pyplot as plt
''' 
You are only required to fill the following functions
mean_squared_loss
mean_squared_gradient
mean_absolute_loss
mean_absolute_gradient
mean_log_cosh_loss
mean_log_cosh_gradient
root_mean_squared_loss
root_mean_squared_gradient
preprocess_dataset
main

Don't modify any other functions or commandline arguments because autograder will be used
Don't modify function declaration (arguments)

'''

def mean_squared_loss(xdata, ydata, weights):

	guess = np.dot(xdata,weights)
	samples = np.shape(guess)[0]
	err = 0.5*samples*np.sum(np.square(ydata.T-guess))
	return err
	raise NotImplementedError

def mean_squared_gradient(xdata, ydata, weights):

	samples = np.shape(xdata)[0]
	guess = np.dot(xdata,weights)
	gradient = (1/samples)*np.dot(xdata.T,(guess-ydata.T))
	return gradient

	raise NotImplementedError

def mean_absolute_loss(xdata, ydata, weights):

	raise NotImplementedError

def mean_absolute_gradient(xdata, ydata, weights):

	raise NotImplementedError

def mean_log_cosh_loss(xdata, ydata, weights):

	raise NotImplementedError

def mean_log_cosh_gradient(xdata, ydata, weights):

	raise NotImplementedError

def root_mean_squared_loss(xdata, ydata, weights):

	raise NotImplementedError

def root_mean_squared_gradient(xdata, ydata, weights):

	raise NotImplementedError

class LinearRegressor:

	def __init__(self,dims):
		
		self.dims = dims
		self.W = np.zeros(dims) 
		return

		raise NotImplementedError

	def train(self, xtrain, ytrain, loss_function, gradient_function, epoch=100, lr=1):
		errlog = []
		samples = np.shape(xtrain)[0]
		for iterations in range(epoch):
			self.W = self.W - lr*gradient_function(xtrain,ytrain,self.W)
			errlog.append(loss_function(xtrain,ytrain,self.W))
		return errlog
		raise NotImplementedError

	def predict(self, xtest):
		
		# This returns your prediction on xtest
		return np.dot(xtest,self.W)
		raise NotImplementedError


def read_dataset(trainfile, testfile):
	'''
	Reads the input data from train and test files and 
	Returns the matrices Xtrain : [N X D] and Ytrain : [N X 1] and Xtest : [M X D] 
	where D is number of features and N is the number of train rows and M is the number of test rows
	'''
	xtrain = []
	ytrain = []
	xtest = []

	with open(trainfile,'r') as f:
		reader = csv.reader(f,delimiter=',')
		next(reader, None)
		for row in reader:
			xtrain.append(row[:-1])
			ytrain.append(row[-1])

	with open(testfile,'r') as f:
		reader = csv.reader(f,delimiter=',')
		next(reader, None)
		for row in reader:
			xtest.append(row)

	return np.array(xtrain), np.array(ytrain), np.array(xtest)

def preprocess_dataset(xdata, ydata=None):
	xdata = xdata[:,[2,3,4,7,9]]
	xdata = xdata.astype('float32') 
	bias = np.ones((np.shape(xdata)[0],1))
	xdata = np.concatenate((bias,xdata),axis=1)
	if ydata is None:
		return xdata
	ydata = ydata.astype('float32')
	return xdata,ydata
	raise NotImplementedError

dictionary_of_losses = {
	'mse':(mean_squared_loss, mean_squared_gradient),
	'mae':(mean_absolute_loss, mean_absolute_gradient),
	'rmse':(root_mean_squared_loss, root_mean_squared_gradient),
	'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
}

def main():

	# You are free to modify the main function as per your requirements.
	# Uncomment the below lines and pass the appropriate value

	xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
	xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
	xtestprocessed = preprocess_dataset(xtest)
	
	model = LinearRegressor(np.shape(xtrainprocessed)[1])

	# The loss function is provided by command line argument	
	loss_fn, loss_grad = dictionary_of_losses[args.loss]

	errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
	ytest = model.predict(xtestprocessed)
	ytest = ytest.astype('int')
	output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
	np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
	np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")


if __name__ == '__main__':

	parser = argparse.ArgumentParser()

	parser.add_argument('--loss', default='mse', choices=['mse','mae','rmse','logcosh'], help='loss function')
	parser.add_argument('--lr', default=1.0, type=float, help='learning rate')
	parser.add_argument('--epoch', default=100, type=int, help='number of epochs')
	parser.add_argument('--train_file', type=str, help='location of the training file')
	parser.add_argument('--test_file', type=str, help='location of the test file')

	args = parser.parse_args()

	main()
