inintal commit

e0cb7c11 · desiredeveloper · e0cb7c11 · e0cb7c11 · e0cb7c11 · e0cb7c11
Commit e0cb7c11 authored Sep 02, 2019 by desiredeveloper
7 changed files
--- a/Assignment1.pdf
+++ b/Assignment1.pdf
--- a/Assignment1/main.py
+++ b/Assignment1/main.py
+import numpy as np
+import argparse
+import csv
+
+''' 
+You are only required to fill the following functions
+mean_squared_loss
+mean_squared_gradient
+mean_absolute_loss
+mean_absolute_gradient
+mean_log_cosh_loss
+mean_log_cosh_gradient
+root_mean_squared_loss
+root_mean_squared_gradient
+preprocess_dataset
+main
+
+Don't modify any other functions or commandline arguments because autograder will be used
+Don't modify function declaration (arguments)
+
+'''
+
+def mean_squared_loss(xdata, ydata, weights):
+	'''
+	weights = weight vector [D X 1]
+	xdata = input feature matrix [N X D]
+	ydata = output values [N X 1]
+	Return the mean squared loss
+	'''
+	
+	raise NotImplementedError
+
+def mean_squared_gradient(xdata, ydata, weights):
+	'''
+	weights = weight vector [D X 1]
+	xdata = input feature matrix [N X D]
+	ydata = output values [N X 1]
+	Return the mean squared gradient
+	'''
+
+	raise NotImplementedError
+
+def mean_absolute_loss(xdata, ydata, weights):
+
+	raise NotImplementedError
+
+def mean_absolute_gradient(xdata, ydata, weights):
+
+	raise NotImplementedError
+
+def mean_log_cosh_loss(xdata, ydata, weights):
+
+	raise NotImplementedError
+
+def mean_log_cosh_gradient(xdata, ydata, weights):
+
+	raise NotImplementedError
+
+def root_mean_squared_loss(xdata, ydata, weights):
+
+	raise NotImplementedError
+
+def root_mean_squared_gradient(xdata, ydata, weights):
+
+	raise NotImplementedError
+
+class LinearRegressor:
+
+	def __init__(self,dims):
+		
+		# dims is the number of the features
+		# You can use __init__ to initialise your weight and biases
+		# Create all class related variables here
+
+		raise NotImplementedError
+
+	def train(self, xtrain, ytrain, loss_function, gradient_function, epoch=100, lr=1.0):
+		'''
+		xtrain = input feature matrix [N X D]
+		ytrain = output values [N X 1]
+		learn weight vector [D X 1]
+		epoch = scalar parameter epoch
+		lr = scalar parameter learning rate
+		loss_function = loss function name for linear regression training
+		gradient_function = gradient name of loss function
+		'''
+		# You need to write the training loop to update weights here
+		
+		raise NotImplementedError
+
+	def predict(self, xtest):
+		
+		# This returns your prediction on xtest
+		raise NotImplementedError
+
+
+def read_dataset(trainfile, testfile):
+	'''
+	Reads the input data from train and test files and 
+	Returns the matrices Xtrain : [N X D] and Ytrain : [N X 1] and Xtest : [M X D] 
+	where D is number of features and N is the number of train rows and M is the number of test rows
+	'''
+	xtrain = []
+	ytrain = []
+	xtest = []
+
+	with open(trainfile,'r') as f:
+		reader = csv.reader(f,delimiter=',')
+		next(reader, None)
+		for row in reader:
+			xtrain.append(row[:-1])
+			ytrain.append(row[-1])
+
+	with open(testfile,'r') as f:
+		reader = csv.reader(f,delimiter=',')
+		next(reader, None)
+		for row in reader:
+			xtest.append(row)
+
+	return np.array(xtrain), np.array(ytrain), np.array(xtest)
+
+def preprocess_dataset(xdata, ydata=None):
+	'''
+	xdata = input feature matrix [N X D] 
+	ydata = output values [N X 1]
+	Convert data xdata, ydata obtained from read_dataset() to a usable format by loss function
+
+	The ydata argument is optional so this function must work for the both the calls
+	xtrain_processed, ytrain_processed = preprocess_dataset(xtrain,ytrain)
+	xtest_processed = preprocess_dataset(xtest)	
+	
+	NOTE: You can ignore/drop few columns. You can feature scale the input data before processing further.
+	'''
+
+	raise NotImplementedError
+
+dictionary_of_losses = {
+	'mse':(mean_squared_loss, mean_squared_gradient),
+	'mae':(mean_absolute_loss, mean_absolute_gradient),
+	'rmse':(root_mean_squared_loss, root_mean_squared_gradient),
+	'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
+}
+
+def main():
+
+	# You are free to modify the main function as per your requirements.
+	# Uncomment the below lines and pass the appropriate value
+
+	xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
+	# xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
+	# xtestprocessed = preprocess_dataset(xtest)
+
+	# model = LinearRegressor(FILL HERE)
+
+	# The loss function is provided by command line argument	
+	loss_fn, loss_grad = dictionary_of_losses[args.loss]
+
+	# model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
+
+	# ytest = model.predict(xtestprocessed)
+
+
+if __name__ == '__main__':
+
+	parser = argparse.ArgumentParser()
+
+	parser.add_argument('--loss', default='mse', choices=['mse','mae','rmse','logcosh'], help='loss function')
+	parser.add_argument('--lr', default=1.0, type=float, help='learning rate')
+	parser.add_argument('--epoch', default=100, type=int, help='number of epochs')
+	parser.add_argument('--train_file', type=str, help='location of the training file')
+	parser.add_argument('--test_file', type=str, help='location of the test file')
+
+	args = parser.parse_args()
+
+	main()
--- a/Assignment1/test.csv
+++ b/Assignment1/test.csv
--- a/Assignment1/train.csv
+++ b/Assignment1/train.csv
--- a/cs725-2019a/sampleSubmission.csv
+++ b/cs725-2019a/sampleSubmission.csv
+instance (id),count
+0,8
+1,466
+2,176
+3,58
+4,288
+5,795
+6,1
+7,292
+8,427
+9,73
+10,216
+11,11
+12,171
+13,238
--- a/cs725-2019a/test.csv
+++ b/cs725-2019a/test.csv
--- a/cs725-2019a/train.csv
+++ b/cs725-2019a/train.csv