Commit 1b4341db authored by SHREYANSH JAIN's avatar SHREYANSH JAIN

mse final leaderboard

parent 13f77136
This diff is collapsed.
......@@ -91,10 +91,11 @@ def root_mean_squared_gradient(xdata, ydata, weights):
class LinearRegressor:
def __init__(self,dims):
def __init__(self, dims):
self.dims = dims
self.W = np.zeros(dims)
self.W = np.random.rand(dims)
#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
return
raise NotImplementedError
......@@ -140,16 +141,89 @@ def read_dataset(trainfile, testfile):
return np.array(xtrain), np.array(ytrain), np.array(xtest)
def one_hot_encoding(value_list, classes):
res = np.eye(classes)[value_list.reshape(-1)]
return res.reshape(list(value_list.shape)+[classes])
norm_dict = {}
dictionary_of_classes_for_features = {
2 : 5,
3 : 25,
5: 8,
7: 5
}
dictionary_of_days = {
'Monday' : 1,
'Tuesday': 2,
'Wednesday': 3,
'Thursday' : 4,
'Friday' : 5,
'Saturday': 6,
'Sunday' : 7
}
def slicer(arr, beg, end):
return np.array([i[beg:end] for i in arr]).reshape(-1, 1)
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
def normalize(arr):
arr = arr
if not norm_dict: # make dictionary once at training to be used later during test
# for i in range(arr.shape[1]):
norm_dict['init'] = [np.min(arr), np.max(arr)]
#norm_dict['init'] = [np.mean(arr), np.std(arr)]
# for i in range(arr.shape[1]):
arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
return arr
"""
def preprocess_dataset(xdata, ydata=None):
xdata = xdata[:,[2,3,4,7,9]]
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
# converting weekdays to numeric for one_hot_encoding
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized
xdata[:, 11] = normalize(xdata[:, 10].astype('float'))"""
xdata[:, 5] = [dictionary_of_days[i] for i in xdata[:, 5]]
cat_cols = [2, 3, 5, 7]
for i in cat_cols:
# dropping 2 columns for C-1 encoding and removing additional 0 column
t = one_hot_encoding(xdata[:, i].astype('int'), dictionary_of_classes_for_features[i])[:, 2:]
xdata = np.concatenate((xdata, t),axis=1)
xdata = np.delete(xdata, cat_cols, 1) # removing useless columns
xdata = np.delete(xdata, 6, 1)
xdata = np.delete(xdata, 8, 1)
# extracting features from date
month = slicer(xdata[:, 1], 5,7)
t = one_hot_encoding(month[:,0].astype('int'), 13)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
date = slicer(xdata[:, 1], 8, 10)
week = np.ceil(date.astype('int') / 7) # week of month
t = one_hot_encoding(week[:,0].astype('int'), 6)[:, 2:]
xdata = np.concatenate((xdata, t), axis=1)
xdata = xdata[:,2:] # dropping first 2 unnecessary columns
xdata = xdata.astype('float32')
bias = np.ones((np.shape(xdata)[0],1))
xdata = np.concatenate((bias,xdata),axis=1)
if ydata is None:
return xdata
ydata = ydata.astype('float32')
return xdata,ydata
raise NotImplementedError
dictionary_of_losses = {
'mse':(mean_squared_loss, mean_squared_gradient),
......@@ -158,26 +232,51 @@ dictionary_of_losses = {
'logcosh':(mean_log_cosh_loss, mean_log_cosh_gradient),
}
def main():
"""
#For outliers removal from wind speed column uncomment
def out(x, std, mean):
if ((x < mean + 2 * std)and (x > mean - 2 * std)):
return 0
else:
return 1
def outlier(xtrain, ytrain, std, mean):
a =[]
for i in xtrain[:, 11].astype('float32'):
a.append(out(i,std, mean))
a = np.array(a)
xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, -1, 1)
return (xdata, ytrain)"""
# You are free to modify the main function as per your requirements.
def main():
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
xtrain, ytrain, xtest = read_dataset(args.train_file, args.test_file)
"""
#For outliers removal from wind speed column uncomment
std = np.std(xtrain[:, 11].astype('float32'))
mean = np.mean(xtrain[:, 11].astype('float32'))
xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
xtrainprocessed, ytrainprocessed = preprocess_dataset(xtrain, ytrain)
xtestprocessed = preprocess_dataset(xtest)
model = LinearRegressor(np.shape(xtrainprocessed)[1])
model = LinearRegressor(np.shape(xtrainprocessed)[1])
# The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss]
# The loss function is provided by command line argument
loss_fn, loss_grad = dictionary_of_losses[args.loss]
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
errlog = model.train(xtrainprocessed, ytrainprocessed, loss_fn, loss_grad, args.epoch, args.lr)
ytest = model.predict(xtestprocessed)
ytest = ytest.astype('int')
output = [(i,np.absolute(ytest[i])) for i in range(len(ytest))]
np.savetxt("output.csv",output,delimiter=',',fmt="%d",header="instance (id),count",comments='')
np.savetxt("error.log",errlog,delimiter='\n',fmt="%f")
if __name__ == '__main__':
......@@ -192,4 +291,4 @@ if __name__ == '__main__':
args = parser.parse_args()
main()
main()
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment