Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
ML725
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
SHREYANSH JAIN
ML725
Commits
1b4341db
Commit
1b4341db
authored
Sep 11, 2019
by
SHREYANSH JAIN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mse final leaderboard
parent
13f77136
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11626 additions
and
11527 deletions
+11626
-11527
Assignment1/error.log
Assignment1/error.log
+8000
-8000
Assignment1/main.py
Assignment1/main.py
+125
-26
Assignment1/output.csv
Assignment1/output.csv
+3501
-3501
No files found.
Assignment1/error.log
View file @
1b4341db
This diff is collapsed.
Click to expand it.
Assignment1/main.py
View file @
1b4341db
...
...
@@ -91,10 +91,11 @@ def root_mean_squared_gradient(xdata, ydata, weights):
class
LinearRegressor
:
def
__init__
(
self
,
dims
):
def
__init__
(
self
,
dims
):
self
.
dims
=
dims
self
.
W
=
np
.
zeros
(
dims
)
self
.
W
=
np
.
random
.
rand
(
dims
)
#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
return
raise
NotImplementedError
...
...
@@ -140,16 +141,89 @@ def read_dataset(trainfile, testfile):
return
np
.
array
(
xtrain
),
np
.
array
(
ytrain
),
np
.
array
(
xtest
)
def
one_hot_encoding
(
value_list
,
classes
):
res
=
np
.
eye
(
classes
)[
value_list
.
reshape
(
-
1
)]
return
res
.
reshape
(
list
(
value_list
.
shape
)
+
[
classes
])
norm_dict
=
{}
dictionary_of_classes_for_features
=
{
2
:
5
,
3
:
25
,
5
:
8
,
7
:
5
}
dictionary_of_days
=
{
'Monday'
:
1
,
'Tuesday'
:
2
,
'Wednesday'
:
3
,
'Thursday'
:
4
,
'Friday'
:
5
,
'Saturday'
:
6
,
'Sunday'
:
7
}
def
slicer
(
arr
,
beg
,
end
):
return
np
.
array
([
i
[
beg
:
end
]
for
i
in
arr
])
.
reshape
(
-
1
,
1
)
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
def normalize(arr):
arr = arr
if not norm_dict: # make dictionary once at training to be used later during test
# for i in range(arr.shape[1]):
norm_dict['init'] = [np.min(arr), np.max(arr)]
#norm_dict['init'] = [np.mean(arr), np.std(arr)]
# for i in range(arr.shape[1]):
arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
return arr
"""
def
preprocess_dataset
(
xdata
,
ydata
=
None
):
xdata
=
xdata
[:,[
2
,
3
,
4
,
7
,
9
]]
xdata
=
xdata
.
astype
(
'float32'
)
bias
=
np
.
ones
((
np
.
shape
(
xdata
)[
0
],
1
))
xdata
=
np
.
concatenate
((
bias
,
xdata
),
axis
=
1
)
if
ydata
is
None
:
return
xdata
ydata
=
ydata
.
astype
(
'float32'
)
return
xdata
,
ydata
raise
NotImplementedError
# converting weekdays to numeric for one_hot_encoding
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized
xdata[:, 11] = normalize(xdata[:, 10].astype('float'))"""
xdata
[:,
5
]
=
[
dictionary_of_days
[
i
]
for
i
in
xdata
[:,
5
]]
cat_cols
=
[
2
,
3
,
5
,
7
]
for
i
in
cat_cols
:
# dropping 2 columns for C-1 encoding and removing additional 0 column
t
=
one_hot_encoding
(
xdata
[:,
i
]
.
astype
(
'int'
),
dictionary_of_classes_for_features
[
i
])[:,
2
:]
xdata
=
np
.
concatenate
((
xdata
,
t
),
axis
=
1
)
xdata
=
np
.
delete
(
xdata
,
cat_cols
,
1
)
# removing useless columns
xdata
=
np
.
delete
(
xdata
,
6
,
1
)
xdata
=
np
.
delete
(
xdata
,
8
,
1
)
# extracting features from date
month
=
slicer
(
xdata
[:,
1
],
5
,
7
)
t
=
one_hot_encoding
(
month
[:,
0
]
.
astype
(
'int'
),
13
)[:,
2
:]
xdata
=
np
.
concatenate
((
xdata
,
t
),
axis
=
1
)
date
=
slicer
(
xdata
[:,
1
],
8
,
10
)
week
=
np
.
ceil
(
date
.
astype
(
'int'
)
/
7
)
# week of month
t
=
one_hot_encoding
(
week
[:,
0
]
.
astype
(
'int'
),
6
)[:,
2
:]
xdata
=
np
.
concatenate
((
xdata
,
t
),
axis
=
1
)
xdata
=
xdata
[:,
2
:]
# dropping first 2 unnecessary columns
xdata
=
xdata
.
astype
(
'float32'
)
bias
=
np
.
ones
((
np
.
shape
(
xdata
)[
0
],
1
))
xdata
=
np
.
concatenate
((
bias
,
xdata
),
axis
=
1
)
if
ydata
is
None
:
return
xdata
ydata
=
ydata
.
astype
(
'float32'
)
return
xdata
,
ydata
raise
NotImplementedError
dictionary_of_losses
=
{
'mse'
:(
mean_squared_loss
,
mean_squared_gradient
),
...
...
@@ -158,26 +232,51 @@ dictionary_of_losses = {
'logcosh'
:(
mean_log_cosh_loss
,
mean_log_cosh_gradient
),
}
def
main
():
"""
#For outliers removal from wind speed column uncomment
def out(x, std, mean):
if ((x < mean + 2 * std)and (x > mean - 2 * std)):
return 0
else:
return 1
def outlier(xtrain, ytrain, std, mean):
a =[]
for i in xtrain[:, 11].astype('float32'):
a.append(out(i,std, mean))
a = np.array(a)
xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, -1, 1)
return (xdata, ytrain)"""
# You are free to modify the main function as per your requirements.
def
main
():
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
xtrain
,
ytrain
,
xtest
=
read_dataset
(
args
.
train_file
,
args
.
test_file
)
xtrainprocessed
,
ytrainprocessed
=
preprocess_dataset
(
xtrain
,
ytrain
)
xtestprocessed
=
preprocess_dataset
(
xtest
)
xtrain
,
ytrain
,
xtest
=
read_dataset
(
args
.
train_file
,
args
.
test_file
)
"""
#For outliers removal from wind speed column uncomment
std = np.std(xtrain[:, 11].astype('float32'))
mean = np.mean(xtrain[:, 11].astype('float32'))
xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
xtrainprocessed
,
ytrainprocessed
=
preprocess_dataset
(
xtrain
,
ytrain
)
xtestprocessed
=
preprocess_dataset
(
xtest
)
model
=
LinearRegressor
(
np
.
shape
(
xtrainprocessed
)[
1
])
model
=
LinearRegressor
(
np
.
shape
(
xtrainprocessed
)[
1
])
# The loss function is provided by command line argument
loss_fn
,
loss_grad
=
dictionary_of_losses
[
args
.
loss
]
# The loss function is provided by command line argument
loss_fn
,
loss_grad
=
dictionary_of_losses
[
args
.
loss
]
errlog
=
model
.
train
(
xtrainprocessed
,
ytrainprocessed
,
loss_fn
,
loss_grad
,
args
.
epoch
,
args
.
lr
)
ytest
=
model
.
predict
(
xtestprocessed
)
ytest
=
ytest
.
astype
(
'int'
)
output
=
[(
i
,
np
.
absolute
(
ytest
[
i
]))
for
i
in
range
(
len
(
ytest
))]
np
.
savetxt
(
"output.csv"
,
output
,
delimiter
=
','
,
fmt
=
"
%
d"
,
header
=
"instance (id),count"
,
comments
=
''
)
np
.
savetxt
(
"error.log"
,
errlog
,
delimiter
=
'
\n
'
,
fmt
=
"
%
f"
)
errlog
=
model
.
train
(
xtrainprocessed
,
ytrainprocessed
,
loss_fn
,
loss_grad
,
args
.
epoch
,
args
.
lr
)
ytest
=
model
.
predict
(
xtestprocessed
)
ytest
=
ytest
.
astype
(
'int'
)
output
=
[(
i
,
np
.
absolute
(
ytest
[
i
]))
for
i
in
range
(
len
(
ytest
))]
np
.
savetxt
(
"output.csv"
,
output
,
delimiter
=
','
,
fmt
=
"
%
d"
,
header
=
"instance (id),count"
,
comments
=
''
)
np
.
savetxt
(
"error.log"
,
errlog
,
delimiter
=
'
\n
'
,
fmt
=
"
%
f"
)
if
__name__
==
'__main__'
:
...
...
@@ -192,4 +291,4 @@ if __name__ == '__main__':
args
=
parser
.
parse_args
()
main
()
main
()
\ No newline at end of file
Assignment1/output.csv
View file @
1b4341db
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment