Merge branch 'revert-6c9f1173' into 'master'

Revert "Merge branch 'test' into 'master'" See merge request !2

Merge branch 'revert-6c9f1173' into 'master'
Revert "Merge branch 'test' into 'master'" See merge request !2
13f77136 · SHREYANSH JAIN · 6c9f1173 · eed7eff9 · 13f77136 · 13f77136
Commit 13f77136 authored Sep 11, 2019 by SHREYANSH JAIN
Showing with 11534 additions and 11638 deletions

Assignment1/error.log Assignment1/error.log +8000 -8000

Assignment1/main.py Assignment1/main.py +33 -137

Assignment1/output.csv Assignment1/output.csv +3501 -3501

No files found.
--- a/Assignment1/error.log
+++ b/Assignment1/error.log
--- a/Assignment1/main.py
+++ b/Assignment1/main.py
 import numpy as np
 import argparse
 import csv
-
 # import matplotlib.pyplot as plt
 ''' 
 You are only required to fill the following functions
@@ -42,15 +41,17 @@ def mean_absolute_loss(xdata, ydata, weights):

 	guess = np.dot(xdata,weights)
 	samples = np.shape(guess)[0]
-	err = 0.5*samples*np.sum(np.absolute(ydata-guess))
+	err = (1/samples)*np.sum(np.absolute(ydata-guess))
 	return err
 	raise NotImplementedError

 def mean_absolute_gradient(xdata, ydata, weights):

-	samples = np.shape(xdata)[0]
 	guess = np.dot(xdata,weights)
-	gradient = (1/samples)*np.dot(xdata.T,(guess-ydata))
+	if np.sum(ydata-guess) < 0:
+		gradient = np.random.randint(0,10,np.shape(weights)[0])
+	else:
+		gradient = np.random.randint(-10,0,np.shape(weights)[0])
 	return gradient

 	raise NotImplementedError
@@ -59,17 +60,15 @@ def mean_log_cosh_loss(xdata, ydata, weights):

 	guess = np.dot(xdata,weights)
 	samples = np.shape(guess)[0]
-	err = samples*np.sum(np.log(np.cosh(ydata-guess)))
+	err = (1/samples)*np.sum(np.square(ydata-guess))
 	return err
 	raise NotImplementedError

 def mean_log_cosh_gradient(xdata, ydata, weights):

 	guess = np.dot(xdata,weights)
-	simplerr = np.multiply(2,ydata-guess)
 	samples = np.shape(guess)[0]
-	derivative = np.divide(np.exp(simplerr)-1,np.exp(simplerr)+1)
-	gradient = (1/samples)*np.dot(xdata.T,derivative)
+	gradient = np.dot(xdata.T,np.tanh(guess-ydata))
 	return gradient

 	raise NotImplementedError
@@ -92,11 +91,10 @@ def root_mean_squared_gradient(xdata, ydata, weights):

 class LinearRegressor:

-	def __init__(self, dims):
+	def __init__(self,dims):
 		
 		self.dims = dims
-		self.W = np.random.rand(dims)
-		#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
+		self.W = np.zeros(dims) 
 		return

 		raise NotImplementedError
@@ -142,93 +140,16 @@ def read_dataset(trainfile, testfile):

 	return np.array(xtrain), np.array(ytrain), np.array(xtest)

-def one_hot_encoding(value_list, classes):
-    res = np.eye(classes)[value_list.reshape(-1)]
-    return res.reshape(list(value_list.shape)+[classes])
-
-norm_dict = {}
-
-dictionary_of_classes_for_features = {
-	2 : 5,
-	3 : 25,
-	5: 8,
-	7: 5
-}
-
-dictionary_of_days = {
-	'Monday' : 1,
-	'Tuesday': 2,
-	'Wednesday': 3,
-	'Thursday' : 4,
-	'Friday' : 5,
-	'Saturday': 6,
-	'Sunday' : 7
-}
-
-def slicer(arr, beg, end):
-	return np.array([i[beg:end] for i in arr]).reshape(-1, 1)
-"""	
-#for normalization of parametes 'wind speed' and 'humidity' uncoment
-def normalize(arr):
-	arr = arr
-	if not norm_dict: # make dictionary once at training to be used later during test
-		# for i in range(arr.shape[1]):
-		norm_dict['init'] = [np.min(arr), np.max(arr)]
-		#norm_dict['init'] = [np.mean(arr), np.std(arr)]
-	# for i in range(arr.shape[1]):
-	arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
-	#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
-		
-	return arr
-"""
-# 4 hours band
-# 1/-1 encoding
-# use feature selection and tuning in Jupyter then apply it back here
-
 def preprocess_dataset(xdata, ydata=None):
-	
-	# converting weekdays to numeric for one_hot_encoding
-    """
-
-	#for normalization of parametes 'wind speed' and 'humidity' uncoment
-	xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized
-	xdata[:, 11] = normalize(xdata[:, 10].astype('float'))"""
-    xdata[:, 5] = [dictionary_of_days[i] for i in xdata[:, 5]]
-
-    cat_cols = [2, 3, 5, 7]
-
-	
-    for i in cat_cols:
-		# dropping 2 columns for C-1 encoding and removing additional 0 column
-        t = one_hot_encoding(xdata[:, i].astype('int'), dictionary_of_classes_for_features[i])[:, 2:]
-        xdata = np.concatenate((xdata, t),axis=1)
-	
-    xdata = np.delete(xdata, cat_cols, 1) # removing useless columns
-    xdata = np.delete(xdata, 6, 1)
-    xdata = np.delete(xdata, 8, 1)
-	
-    # extracting features from date
-    month = slicer(xdata[:, 1], 5,7)
-    t = one_hot_encoding(month[:,0].astype('int'), 13)[:, 2:]
-    xdata = np.concatenate((xdata, t), axis=1)
-    date = slicer(xdata[:, 1], 8, 10)
-    week = np.ceil(date.astype('int') / 7)  # week of month
-    t = one_hot_encoding(week[:,0].astype('int'), 6)[:, 2:]
-    xdata = np.concatenate((xdata, t), axis=1)
-
-
-    xdata = xdata[:,2:] # dropping first 2 unnecessary columns
-    print(xdata[0:5])
-	
-    xdata = xdata.astype('float32')
-    bias = np.ones((np.shape(xdata)[0],1))
-    xdata = np.concatenate((bias,xdata),axis=1)
-
-    if ydata is None:
-        return xdata
-    ydata = ydata.astype('float32')
-    return xdata,ydata
-    raise NotImplementedError
+	xdata = xdata[:,[2,3,4,7,9]]
+	xdata = xdata.astype('float32') 
+	bias = np.ones((np.shape(xdata)[0],1))
+	xdata = np.concatenate((bias,xdata),axis=1)
+	if ydata is None:
+		return xdata
+	ydata = ydata.astype('float32')
+	return xdata,ydata
+	raise NotImplementedError

 dictionary_of_losses = {
 	'mse':(mean_squared_loss, mean_squared_gradient),
@@ -237,51 +158,26 @@ dictionary_of_losses = {
 	'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
 }

-"""
-#For outliers removal from wind speed column uncomment
-def out(x, std, mean):
-    if ((x < mean + 2 * std)and (x > mean - 2 * std)):
-        return 0
-    else:
-        return 1
-
-
-def outlier(xtrain, ytrain, std, mean):
-    a =[]
-    for i in xtrain[:, 11].astype('float32'):
-        a.append(out(i,std, mean))
-    a = np.array(a)
-    xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
-    ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
-    xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
-    xdata = np.delete(xdata, -1, 1)
-    return (xdata, ytrain)"""
-
 def main():
-    # You are free to modify the main function as per your requirements.
-	# Uncomment the below lines and pass the appropriate value

-    xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
+	# You are free to modify the main function as per your requirements.
+	# Uncomment the below lines and pass the appropriate value

-    """
-    #For outliers removal from wind speed column uncomment
-    std = np.std(xtrain[:, 11].astype('float32'))
-    mean = np.mean(xtrain[:, 11].astype('float32'))
-    xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
-    xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
-    xtestprocessed = preprocess_dataset(xtest)
+	xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
+	xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
+	xtestprocessed = preprocess_dataset(xtest)
 	
-    model = LinearRegressor(np.shape(xtrainprocessed)[1])
+	model = LinearRegressor(np.shape(xtrainprocessed)[1])

-    # The loss function is provided by command line argument
-    loss_fn, loss_grad = dictionary_of_losses[args.loss]
+	# The loss function is provided by command line argument	
+	loss_fn, loss_grad = dictionary_of_losses[args.loss]

-    errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
-    ytest = model.predict(xtestprocessed)
-    ytest = ytest.astype('int')
-    output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
-    np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
-    np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
+	errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
+	ytest = model.predict(xtestprocessed)
+	ytest = ytest.astype('int')
+	output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
+	np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
+	np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")


 if __name__ == '__main__':
@@ -296,4 +192,4 @@ if __name__ == '__main__':

 	args = parser.parse_args()

-	main()
\ No newline at end of file
+	main()
--- a/Assignment1/output.csv
+++ b/Assignment1/output.csv