final submission for moodle checkpoint

b311859a · SHREYANSH JAIN · acf2bc51 · b311859a · b311859a · b311859a
Commit b311859a authored Sep 12, 2019 by SHREYANSH JAIN
9 changed files
--- a/Assignment1/data/plot1/comparison.jpg
+++ b/Assignment1/data/plot1/comparison.jpg
--- a/Assignment1/data/plot1/logcosh.log
+++ b/Assignment1/data/plot1/logcosh.log
--- a/Assignment1/data/plot1/mae.log
+++ b/Assignment1/data/plot1/mae.log
--- a/Assignment1/data/plot1/mse.log
+++ b/Assignment1/data/plot1/mse.log
--- a/Assignment1/data/plot1/plot.py
+++ b/Assignment1/data/plot1/plot.py
@@ -15,7 +15,7 @@ with open('logcosh.log','r') as csvfile:
 plt.plot(x,y, label='LOGCOSH')
 plt.xlabel('epoch')
-plt.ylabel('Error')
+plt.ylabel('mean_squared_loss')
 x = []
@@ -47,7 +47,22 @@ with open('rmse.log','r') as csvfile:
 plt.plot(x,y, label='RMSE')
-plt.plot(x,y, label='')
+x = []
+y = []
+i=1
+with open('mse.log','r') as csvfile:
+    for row in csvfile:
+        x.append(i)
+        i+=1
+        y.append(float(row[:-1]))
+plt.plot(x,y, label='MSE')
+# plt.plot(x,y, label='')
 plt.legend()
 plt.show()
\ No newline at end of file
--- a/Assignment1/data/plot1/rmse.log
+++ b/Assignment1/data/plot1/rmse.log
--- a/Assignment1/error.log
+++ b/Assignment1/error.log
--- a/Assignment1/main.py
+++ b/Assignment1/main.py
@@ -2,23 +2,10 @@ import numpy as np
 import argparse
 import csv
 import warnings
-# import matplotlib.pyplot as plt
-''' 
-You are only required to fill the following functions
-mean_squared_loss
-mean_squared_gradient
-mean_absolute_loss
-mean_absolute_gradient
-mean_log_cosh_loss
-mean_log_cosh_gradient
-root_mean_squared_loss
-root_mean_squared_gradient
-preprocess_dataset
-main
-Don't modify any other functions or commandline arguments because autograder will be used
-Don't modify function declaration (arguments)
+'''
+Commented portion may not help much in 
+optimization but will help in visualization !!
 '''
 def mean_squared_loss(xdata, ydata, weights):
@@ -49,11 +36,13 @@ def mean_absolute_loss(xdata, ydata, weights):
 def mean_absolute_gradient(xdata, ydata, weights):
 	guess = np.dot(xdata,weights)
+	samples = np.shape(guess)[0]
 	if np.sum(ydata-guess) < 0:
-		gradient = np.random.randint(0,10,np.shape(weights)[0])
+		gradient = xdata.sum(axis=0)/samples
 	else:
-		gradient = np.random.randint(-10,0,np.shape(weights)[0])
+		gradient = -xdata.sum(axis=0)/samples
-	return gradient
+	return gradient.T
 	raise NotImplementedError
@@ -112,22 +101,18 @@ class LinearRegressor:
 		for iterations in range(epoch):
 			self.W = self.W - lr*gradient_function(xtrain,ytrain,self.W)
 			errlog.append(loss_function(xtrain,ytrain,self.W))
+			# errlog.append(mean_squared_loss(xtrain,ytrain,self.W))
 		return errlog
 		raise NotImplementedError
 	def predict(self, xtest):
-		# This returns your prediction on xtest
 		return np.dot(xtest,self.W)
 		raise NotImplementedError
 def read_dataset(trainfile, testfile):
-	'''
-	Reads the input data from train and test files and 
-	Returns the matrices Xtrain : [N X D] and Ytrain : [N X 1] and Xtest : [M X D] 
-	where D is number of features and N is the number of train rows and M is the number of test rows
-	'''
 	xtrain = []
 	ytrain = []
 	xtest = []
@@ -222,7 +207,7 @@ def preprocess_dataset(xdata, ydata=None):
    xdata = xdata[:,2:] # dropping first 2 unnecessary columns
    xdata = xdata.astype('float32')
-    bias = np.ones((np.shape(xdata)[0],1))
+    bias = np.ones((np.shape(xdata)[0],1)) # adding Bias in feature Matrix
    xdata = np.concatenate((bias,xdata),axis=1)
    if ydata is None:
@@ -280,18 +265,11 @@ def main():
    errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
    ytest = model.predict(xtestprocessed)
    ytest = ytest.astype('int')
-    output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
+    ytest = [np.absolute(i) for i in ytest]
-    np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
+    print(ytest)
-    np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
+    output = [(i,ytest[i]) for i in range(len(ytest))]
-    # x,y,z  = np.array([[ 1,  0,  2, -3], [ 1, -1,  0, -3], [-2, -5,  1, -3], [ 0, -5,  3, -3], [ 0, -4,  3, -2]]),np.array( [-2,  1,  1,  2,  0]),np.array( [ 1, 0, -2, -1])
+    np.savetxt("prediction.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
-    # print(mean_absolute_loss(x,y,z))
+	# np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
-    # print(mean_absolute_gradient(x,y,z))
-    # print(mean_squared_loss(x,y,z))
-    # print(mean_squared_gradient(x,y,z))
-    # print(root_mean_squared_loss(x,y,z))
-    # print(root_mean_squared_gradient(x,y,z))
-    # print(mean_log_cosh_loss(x,y,z))
-    # print(mean_log_cosh_gradient(x,y,z))
 if __name__ == '__main__':

--- a/Assignment1/output.csv
+++ b/Assignment1/output.csv
@@ -1264,7 +1264,7 @@ instance (id),count
 1262,485
 1263,217
 1264,192
-1265,89
+1265,88
 1266,307
 1267,255
 1268,209