Commit eed7eff9 authored by SHREYANSH JAIN's avatar SHREYANSH JAIN

Revert "Merge branch 'test' into 'master'"

This reverts merge request !1
parent 6c9f1173
This diff is collapsed.
import numpy as np
import argparse
import csv
# import matplotlib.pyplot as plt
'''
You are only required to fill the following functions
......@@ -42,15 +41,17 @@ def mean_absolute_loss(xdata, ydata, weights):
guess = np.dot(xdata,weights)
samples = np.shape(guess)[0]
err = 0.5*samples*np.sum(np.absolute(ydata-guess))
err = (1/samples)*np.sum(np.absolute(ydata-guess))
return err
raise NotImplementedError
def mean_absolute_gradient(xdata, ydata, weights):
samples = np.shape(xdata)[0]
guess = np.dot(xdata,weights)
gradient = (1/samples)*np.dot(xdata.T,(guess-ydata))
if np.sum(ydata-guess) < 0:
gradient = np.random.randint(0,10,np.shape(weights)[0])
else:
gradient = np.random.randint(-10,0,np.shape(weights)[0])
return gradient
raise NotImplementedError
......@@ -59,17 +60,15 @@ def mean_log_cosh_loss(xdata, ydata, weights):
guess = np.dot(xdata,weights)
samples = np.shape(guess)[0]
err = samples*np.sum(np.log(np.cosh(ydata-guess)))
err = (1/samples)*np.sum(np.square(ydata-guess))
return err
raise NotImplementedError
def mean_log_cosh_gradient(xdata, ydata, weights):
guess = np.dot(xdata,weights)
simplerr = np.multiply(2,ydata-guess)
samples = np.shape(guess)[0]
derivative = np.divide(np.exp(simplerr)-1,np.exp(simplerr)+1)
gradient = (1/samples)*np.dot(xdata.T,derivative)
gradient = np.dot(xdata.T,np.tanh(guess-ydata))
return gradient
raise NotImplementedError
......@@ -92,11 +91,10 @@ def root_mean_squared_gradient(xdata, ydata, weights):
class LinearRegressor:
def __init__(self, dims):
def __init__(self,dims):
self.dims = dims
self.W = np.random.rand(dims)
#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
self.W = np.zeros(dims)
return
raise NotImplementedError
......@@ -142,93 +140,16 @@ def read_dataset(trainfile, testfile):
return np.array(xtrain), np.array(ytrain), np.array(xtest)
def one_hot_encoding(value_list, classes):
res = np.eye(classes)[value_list.reshape(-1)]
return res.reshape(list(value_list.shape)+[classes])
norm_dict = {}
dictionary_of_classes_for_features = {
2 : 5,
3 : 25,
5: 8,
7: 5
}
dictionary_of_days = {
'Monday' : 1,
'Tuesday': 2,
'Wednesday': 3,
'Thursday' : 4,
'Friday' : 5,
'Saturday': 6,
'Sunday' : 7
}
def slicer(arr, beg, end):
return np.array([i[beg:end] for i in arr]).reshape(-1, 1)
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
def normalize(arr):
arr = arr
if not norm_dict: # make dictionary once at training to be used later during test
# for i in range(arr.shape[1]):
norm_dict['init'] = [np.min(arr), np.max(arr)]
#norm_dict['init'] = [np.mean(arr), np.std(arr)]
# for i in range(arr.shape[1]):
arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
return arr
"""
# 4 hours band
# 1/-1 encoding
# use feature selection and tuning in Jupyter then apply it back here
def preprocess_dataset(xdata, ydata=None):
# converting weekdays to numeric for one_hot_encoding
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized
xdata[:, 11] = normalize(xdata[:, 10].astype('float'))"""
xdata[:, 5] = [dictionary_of_days[i] for i in xdata[:, 5]]
cat_cols = [2, 3, 5, 7]
for i in cat_cols:
# dropping 2 columns for C-1 encoding and removing additional 0 column
t = one_hot_encoding(xdata[:, i].astype('int'), dictionary_of_classes_for_features[i])[:, 2:]
xdata = np.concatenate((xdata, t),axis=1)
xdata = np.delete(xdata, cat_cols, 1) # removing useless columns
xdata = np.delete(xdata, 6, 1)
xdata = np.delete(xdata, 8, 1)
# extracting features from date
month = slicer(xdata[:, 1], 5,7)
t = one_hot_encoding(month[:,0].astype('int'), 13)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
date = slicer(xdata[:, 1], 8, 10)
week = np.ceil(date.astype('int') / 7) # week of month
t = one_hot_encoding(week[:,0].astype('int'), 6)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
xdata = xdata[:,2:] # dropping first 2 unnecessary columns
print(xdata[0:5])
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
xdata = xdata[:,[2,3,4,7,9]]
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
dictionary_of_losses = {
'mse':(mean_squared_loss, mean_squared_gradient),
......@@ -237,51 +158,26 @@ dictionary_of_losses = {
'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
}
"""
#For outliers removal from wind speed column uncomment
def out(x, std, mean):
if ((x < mean + 2 * std)and (x > mean - 2 * std)):
return 0
else:
return 1
def outlier(xtrain, ytrain, std, mean):
a =[]
for i in xtrain[:, 11].astype('float32'):
a.append(out(i,std, mean))
a = np.array(a)
xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, -1, 1)
return (xdata, ytrain)"""
def main():
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
"""
#For outliers removal from wind speed column uncomment
std = np.std(xtrain[:, 11].astype('float32'))
mean = np.mean(xtrain[:, 11].astype('float32'))
xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
model = LinearRegressor(np.shape(xtrainprocessed)[1])
model = LinearRegressor(np.shape(xtrainprocessed)[1])
# The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss]
# The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss]
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
if __name__ == '__main__':
......@@ -296,4 +192,4 @@ if __name__ == '__main__':
args = parser.parse_args()
main()
\ No newline at end of file
main()
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment