Commit 13f77136 authored by SHREYANSH JAIN's avatar SHREYANSH JAIN

Merge branch 'revert-6c9f1173' into 'master'

Revert "Merge branch 'test' into 'master'"

See merge request !2
parents 6c9f1173 eed7eff9
This diff is collapsed.
import numpy as np
import argparse
import csv
# import matplotlib.pyplot as plt
'''
You are only required to fill the following functions
......@@ -42,15 +41,17 @@ def mean_absolute_loss(xdata, ydata, weights):
guess = np.dot(xdata,weights)
samples = np.shape(guess)[0]
err = 0.5*samples*np.sum(np.absolute(ydata-guess))
err = (1/samples)*np.sum(np.absolute(ydata-guess))
return err
raise NotImplementedError
def mean_absolute_gradient(xdata, ydata, weights):
samples = np.shape(xdata)[0]
guess = np.dot(xdata,weights)
gradient = (1/samples)*np.dot(xdata.T,(guess-ydata))
if np.sum(ydata-guess) < 0:
gradient = np.random.randint(0,10,np.shape(weights)[0])
else:
gradient = np.random.randint(-10,0,np.shape(weights)[0])
return gradient
raise NotImplementedError
......@@ -59,17 +60,15 @@ def mean_log_cosh_loss(xdata, ydata, weights):
guess = np.dot(xdata,weights)
samples = np.shape(guess)[0]
err = samples*np.sum(np.log(np.cosh(ydata-guess)))
err = (1/samples)*np.sum(np.square(ydata-guess))
return err
raise NotImplementedError
def mean_log_cosh_gradient(xdata, ydata, weights):
guess = np.dot(xdata,weights)
simplerr = np.multiply(2,ydata-guess)
samples = np.shape(guess)[0]
derivative = np.divide(np.exp(simplerr)-1,np.exp(simplerr)+1)
gradient = (1/samples)*np.dot(xdata.T,derivative)
gradient = np.dot(xdata.T,np.tanh(guess-ydata))
return gradient
raise NotImplementedError
......@@ -92,11 +91,10 @@ def root_mean_squared_gradient(xdata, ydata, weights):
class LinearRegressor:
def __init__(self, dims):
def __init__(self,dims):
self.dims = dims
self.W = np.random.rand(dims)
#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
self.W = np.zeros(dims)
return
raise NotImplementedError
......@@ -142,93 +140,16 @@ def read_dataset(trainfile, testfile):
return np.array(xtrain), np.array(ytrain), np.array(xtest)
def one_hot_encoding(value_list, classes):
res = np.eye(classes)[value_list.reshape(-1)]
return res.reshape(list(value_list.shape)+[classes])
norm_dict = {}
dictionary_of_classes_for_features = {
2 : 5,
3 : 25,
5: 8,
7: 5
}
dictionary_of_days = {
'Monday' : 1,
'Tuesday': 2,
'Wednesday': 3,
'Thursday' : 4,
'Friday' : 5,
'Saturday': 6,
'Sunday' : 7
}
def slicer(arr, beg, end):
return np.array([i[beg:end] for i in arr]).reshape(-1, 1)
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
def normalize(arr):
arr = arr
if not norm_dict: # make dictionary once at training to be used later during test
# for i in range(arr.shape[1]):
norm_dict['init'] = [np.min(arr), np.max(arr)]
#norm_dict['init'] = [np.mean(arr), np.std(arr)]
# for i in range(arr.shape[1]):
arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
return arr
"""
# 4 hours band
# 1/-1 encoding
# use feature selection and tuning in Jupyter then apply it back here
def preprocess_dataset(xdata, ydata=None):
# converting weekdays to numeric for one_hot_encoding
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized
xdata[:, 11] = normalize(xdata[:, 10].astype('float'))"""
xdata[:, 5] = [dictionary_of_days[i] for i in xdata[:, 5]]
cat_cols = [2, 3, 5, 7]
for i in cat_cols:
# dropping 2 columns for C-1 encoding and removing additional 0 column
t = one_hot_encoding(xdata[:, i].astype('int'), dictionary_of_classes_for_features[i])[:, 2:]
xdata = np.concatenate((xdata, t),axis=1)
xdata = np.delete(xdata, cat_cols, 1) # removing useless columns
xdata = np.delete(xdata, 6, 1)
xdata = np.delete(xdata, 8, 1)
# extracting features from date
month = slicer(xdata[:, 1], 5,7)
t = one_hot_encoding(month[:,0].astype('int'), 13)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
date = slicer(xdata[:, 1], 8, 10)
week = np.ceil(date.astype('int') / 7) # week of month
t = one_hot_encoding(week[:,0].astype('int'), 6)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
xdata = xdata[:,2:] # dropping first 2 unnecessary columns
print(xdata[0:5])
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
xdata = xdata[:,[2,3,4,7,9]]
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
dictionary_of_losses = {
'mse':(mean_squared_loss, mean_squared_gradient),
......@@ -237,51 +158,26 @@ dictionary_of_losses = {
'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
}
"""
#For outliers removal from wind speed column uncomment
def out(x, std, mean):
if ((x < mean + 2 * std)and (x > mean - 2 * std)):
return 0
else:
return 1
def outlier(xtrain, ytrain, std, mean):
a =[]
for i in xtrain[:, 11].astype('float32'):
a.append(out(i,std, mean))
a = np.array(a)
xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, -1, 1)
return (xdata, ytrain)"""
def main():
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
"""
#For outliers removal from wind speed column uncomment
std = np.std(xtrain[:, 11].astype('float32'))
mean = np.mean(xtrain[:, 11].astype('float32'))
xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
model = LinearRegressor(np.shape(xtrainprocessed)[1])
model = LinearRegressor(np.shape(xtrainprocessed)[1])
# The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss]
# The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss]
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
if __name__ == '__main__':
......@@ -296,4 +192,4 @@ if __name__ == '__main__':
args = parser.parse_args()
main()
\ No newline at end of file
main()
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment