Commit 13f77136 authored by SHREYANSH JAIN's avatar SHREYANSH JAIN

Merge branch 'revert-6c9f1173' into 'master'

Revert "Merge branch 'test' into 'master'"

See merge request !2
parents 6c9f1173 eed7eff9
This diff is collapsed.
import numpy as np import numpy as np
import argparse import argparse
import csv import csv
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
''' '''
You are only required to fill the following functions You are only required to fill the following functions
...@@ -42,15 +41,17 @@ def mean_absolute_loss(xdata, ydata, weights): ...@@ -42,15 +41,17 @@ def mean_absolute_loss(xdata, ydata, weights):
guess = np.dot(xdata,weights) guess = np.dot(xdata,weights)
samples = np.shape(guess)[0] samples = np.shape(guess)[0]
err = 0.5*samples*np.sum(np.absolute(ydata-guess)) err = (1/samples)*np.sum(np.absolute(ydata-guess))
return err return err
raise NotImplementedError raise NotImplementedError
def mean_absolute_gradient(xdata, ydata, weights): def mean_absolute_gradient(xdata, ydata, weights):
samples = np.shape(xdata)[0]
guess = np.dot(xdata,weights) guess = np.dot(xdata,weights)
gradient = (1/samples)*np.dot(xdata.T,(guess-ydata)) if np.sum(ydata-guess) < 0:
gradient = np.random.randint(0,10,np.shape(weights)[0])
else:
gradient = np.random.randint(-10,0,np.shape(weights)[0])
return gradient return gradient
raise NotImplementedError raise NotImplementedError
...@@ -59,17 +60,15 @@ def mean_log_cosh_loss(xdata, ydata, weights): ...@@ -59,17 +60,15 @@ def mean_log_cosh_loss(xdata, ydata, weights):
guess = np.dot(xdata,weights) guess = np.dot(xdata,weights)
samples = np.shape(guess)[0] samples = np.shape(guess)[0]
err = samples*np.sum(np.log(np.cosh(ydata-guess))) err = (1/samples)*np.sum(np.square(ydata-guess))
return err return err
raise NotImplementedError raise NotImplementedError
def mean_log_cosh_gradient(xdata, ydata, weights): def mean_log_cosh_gradient(xdata, ydata, weights):
guess = np.dot(xdata,weights) guess = np.dot(xdata,weights)
simplerr = np.multiply(2,ydata-guess)
samples = np.shape(guess)[0] samples = np.shape(guess)[0]
derivative = np.divide(np.exp(simplerr)-1,np.exp(simplerr)+1) gradient = np.dot(xdata.T,np.tanh(guess-ydata))
gradient = (1/samples)*np.dot(xdata.T,derivative)
return gradient return gradient
raise NotImplementedError raise NotImplementedError
...@@ -92,11 +91,10 @@ def root_mean_squared_gradient(xdata, ydata, weights): ...@@ -92,11 +91,10 @@ def root_mean_squared_gradient(xdata, ydata, weights):
class LinearRegressor: class LinearRegressor:
def __init__(self, dims): def __init__(self,dims):
self.dims = dims self.dims = dims
self.W = np.random.rand(dims) self.W = np.zeros(dims)
#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
return return
raise NotImplementedError raise NotImplementedError
...@@ -142,93 +140,16 @@ def read_dataset(trainfile, testfile): ...@@ -142,93 +140,16 @@ def read_dataset(trainfile, testfile):
return np.array(xtrain), np.array(ytrain), np.array(xtest) return np.array(xtrain), np.array(ytrain), np.array(xtest)
def one_hot_encoding(value_list, classes):
res = np.eye(classes)[value_list.reshape(-1)]
return res.reshape(list(value_list.shape)+[classes])
norm_dict = {}
dictionary_of_classes_for_features = {
2 : 5,
3 : 25,
5: 8,
7: 5
}
dictionary_of_days = {
'Monday' : 1,
'Tuesday': 2,
'Wednesday': 3,
'Thursday' : 4,
'Friday' : 5,
'Saturday': 6,
'Sunday' : 7
}
def slicer(arr, beg, end):
return np.array([i[beg:end] for i in arr]).reshape(-1, 1)
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
def normalize(arr):
arr = arr
if not norm_dict: # make dictionary once at training to be used later during test
# for i in range(arr.shape[1]):
norm_dict['init'] = [np.min(arr), np.max(arr)]
#norm_dict['init'] = [np.mean(arr), np.std(arr)]
# for i in range(arr.shape[1]):
arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
return arr
"""
# 4 hours band
# 1/-1 encoding
# use feature selection and tuning in Jupyter then apply it back here
def preprocess_dataset(xdata, ydata=None): def preprocess_dataset(xdata, ydata=None):
xdata = xdata[:,[2,3,4,7,9]]
# converting weekdays to numeric for one_hot_encoding xdata = xdata.astype('float32')
""" bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
#for normalization of parametes 'wind speed' and 'humidity' uncoment if ydata is None:
xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized return xdata
xdata[:, 11] = normalize(xdata[:, 10].astype('float'))""" ydata = ydata.astype('float32')
xdata[:, 5] = [dictionary_of_days[i] for i in xdata[:, 5]] return xdata,ydata
raise NotImplementedError
cat_cols = [2, 3, 5, 7]
for i in cat_cols:
# dropping 2 columns for C-1 encoding and removing additional 0 column
t = one_hot_encoding(xdata[:, i].astype('int'), dictionary_of_classes_for_features[i])[:, 2:]
xdata = np.concatenate((xdata, t),axis=1)
xdata = np.delete(xdata, cat_cols, 1) # removing useless columns
xdata = np.delete(xdata, 6, 1)
xdata = np.delete(xdata, 8, 1)
# extracting features from date
month = slicer(xdata[:, 1], 5,7)
t = one_hot_encoding(month[:,0].astype('int'), 13)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
date = slicer(xdata[:, 1], 8, 10)
week = np.ceil(date.astype('int') / 7) # week of month
t = one_hot_encoding(week[:,0].astype('int'), 6)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
xdata = xdata[:,2:] # dropping first 2 unnecessary columns
print(xdata[0:5])
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
dictionary_of_losses = { dictionary_of_losses = {
'mse':(mean_squared_loss, mean_squared_gradient), 'mse':(mean_squared_loss, mean_squared_gradient),
...@@ -237,51 +158,26 @@ dictionary_of_losses = { ...@@ -237,51 +158,26 @@ dictionary_of_losses = {
'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient), 'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
} }
"""
#For outliers removal from wind speed column uncomment
def out(x, std, mean):
if ((x < mean + 2 * std)and (x > mean - 2 * std)):
return 0
else:
return 1
def outlier(xtrain, ytrain, std, mean):
a =[]
for i in xtrain[:, 11].astype('float32'):
a.append(out(i,std, mean))
a = np.array(a)
xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, -1, 1)
return (xdata, ytrain)"""
def main(): def main():
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file) # You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
""" xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
#For outliers removal from wind speed column uncomment xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
std = np.std(xtrain[:, 11].astype('float32')) xtestprocessed = preprocess_dataset(xtest)
mean = np.mean(xtrain[:, 11].astype('float32'))
xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
model = LinearRegressor(np.shape(xtrainprocessed)[1]) model = LinearRegressor(np.shape(xtrainprocessed)[1])
# The loss function is provided by command line argument # The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss] loss_fn, loss_grad = dictionary_of_losses[args.loss]
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr) errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed) ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int') ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))] output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='') np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f") np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
if __name__ == '__main__': if __name__ == '__main__':
...@@ -296,4 +192,4 @@ if __name__ == '__main__': ...@@ -296,4 +192,4 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
main() main()
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment