Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
ML725
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
SHREYANSH JAIN
ML725
Commits
eed7eff9
Commit
eed7eff9
authored
Sep 11, 2019
by
SHREYANSH JAIN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Revert "Merge branch 'test' into 'master'"
This reverts merge request !1
parent
6c9f1173
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11534 additions
and
11638 deletions
+11534
-11638
Assignment1/error.log
Assignment1/error.log
+8000
-8000
Assignment1/main.py
Assignment1/main.py
+33
-137
Assignment1/output.csv
Assignment1/output.csv
+3501
-3501
No files found.
Assignment1/error.log
View file @
eed7eff9
This diff is collapsed.
Click to expand it.
Assignment1/main.py
View file @
eed7eff9
import
numpy
as
np
import
argparse
import
csv
# import matplotlib.pyplot as plt
'''
You are only required to fill the following functions
...
...
@@ -42,15 +41,17 @@ def mean_absolute_loss(xdata, ydata, weights):
guess
=
np
.
dot
(
xdata
,
weights
)
samples
=
np
.
shape
(
guess
)[
0
]
err
=
0.5
*
samples
*
np
.
sum
(
np
.
absolute
(
ydata
-
guess
))
err
=
(
1
/
samples
)
*
np
.
sum
(
np
.
absolute
(
ydata
-
guess
))
return
err
raise
NotImplementedError
def
mean_absolute_gradient
(
xdata
,
ydata
,
weights
):
samples
=
np
.
shape
(
xdata
)[
0
]
guess
=
np
.
dot
(
xdata
,
weights
)
gradient
=
(
1
/
samples
)
*
np
.
dot
(
xdata
.
T
,(
guess
-
ydata
))
if
np
.
sum
(
ydata
-
guess
)
<
0
:
gradient
=
np
.
random
.
randint
(
0
,
10
,
np
.
shape
(
weights
)[
0
])
else
:
gradient
=
np
.
random
.
randint
(
-
10
,
0
,
np
.
shape
(
weights
)[
0
])
return
gradient
raise
NotImplementedError
...
...
@@ -59,17 +60,15 @@ def mean_log_cosh_loss(xdata, ydata, weights):
guess
=
np
.
dot
(
xdata
,
weights
)
samples
=
np
.
shape
(
guess
)[
0
]
err
=
samples
*
np
.
sum
(
np
.
log
(
np
.
cosh
(
ydata
-
guess
)
))
err
=
(
1
/
samples
)
*
np
.
sum
(
np
.
square
(
ydata
-
guess
))
return
err
raise
NotImplementedError
def
mean_log_cosh_gradient
(
xdata
,
ydata
,
weights
):
guess
=
np
.
dot
(
xdata
,
weights
)
simplerr
=
np
.
multiply
(
2
,
ydata
-
guess
)
samples
=
np
.
shape
(
guess
)[
0
]
derivative
=
np
.
divide
(
np
.
exp
(
simplerr
)
-
1
,
np
.
exp
(
simplerr
)
+
1
)
gradient
=
(
1
/
samples
)
*
np
.
dot
(
xdata
.
T
,
derivative
)
gradient
=
np
.
dot
(
xdata
.
T
,
np
.
tanh
(
guess
-
ydata
))
return
gradient
raise
NotImplementedError
...
...
@@ -92,11 +91,10 @@ def root_mean_squared_gradient(xdata, ydata, weights):
class
LinearRegressor
:
def
__init__
(
self
,
dims
):
def
__init__
(
self
,
dims
):
self
.
dims
=
dims
self
.
W
=
np
.
random
.
rand
(
dims
)
#self.W = np.random.uniform(low=0.0, high=1.0, size=dims)
self
.
W
=
np
.
zeros
(
dims
)
return
raise
NotImplementedError
...
...
@@ -142,93 +140,16 @@ def read_dataset(trainfile, testfile):
return
np
.
array
(
xtrain
),
np
.
array
(
ytrain
),
np
.
array
(
xtest
)
def
one_hot_encoding
(
value_list
,
classes
):
res
=
np
.
eye
(
classes
)[
value_list
.
reshape
(
-
1
)]
return
res
.
reshape
(
list
(
value_list
.
shape
)
+
[
classes
])
norm_dict
=
{}
dictionary_of_classes_for_features
=
{
2
:
5
,
3
:
25
,
5
:
8
,
7
:
5
}
dictionary_of_days
=
{
'Monday'
:
1
,
'Tuesday'
:
2
,
'Wednesday'
:
3
,
'Thursday'
:
4
,
'Friday'
:
5
,
'Saturday'
:
6
,
'Sunday'
:
7
}
def
slicer
(
arr
,
beg
,
end
):
return
np
.
array
([
i
[
beg
:
end
]
for
i
in
arr
])
.
reshape
(
-
1
,
1
)
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
def normalize(arr):
arr = arr
if not norm_dict: # make dictionary once at training to be used later during test
# for i in range(arr.shape[1]):
norm_dict['init'] = [np.min(arr), np.max(arr)]
#norm_dict['init'] = [np.mean(arr), np.std(arr)]
# for i in range(arr.shape[1]):
arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1] - norm_dict['init'][0]) for x in arr]) # min-max
#arr = np.array([(x - norm_dict['init'][0])/(norm_dict['init'][1]) for x in arr]) # standardization
return arr
"""
# 4 hours band
# 1/-1 encoding
# use feature selection and tuning in Jupyter then apply it back here
def
preprocess_dataset
(
xdata
,
ydata
=
None
):
# converting weekdays to numeric for one_hot_encoding
"""
#for normalization of parametes 'wind speed' and 'humidity' uncoment
xdata[:, 10] = normalize(xdata[:, 10].astype('float'))# normalized
xdata[:, 11] = normalize(xdata[:, 10].astype('float'))"""
xdata
[:,
5
]
=
[
dictionary_of_days
[
i
]
for
i
in
xdata
[:,
5
]]
cat_cols
=
[
2
,
3
,
5
,
7
]
for
i
in
cat_cols
:
# dropping 2 columns for C-1 encoding and removing additional 0 column
t
=
one_hot_encoding
(
xdata
[:,
i
]
.
astype
(
'int'
),
dictionary_of_classes_for_features
[
i
])[:,
2
:]
xdata
=
np
.
concatenate
((
xdata
,
t
),
axis
=
1
)
xdata
=
np
.
delete
(
xdata
,
cat_cols
,
1
)
# removing useless columns
xdata
=
np
.
delete
(
xdata
,
6
,
1
)
xdata
=
np
.
delete
(
xdata
,
8
,
1
)
# extracting features from date
month
=
slicer
(
xdata
[:,
1
],
5
,
7
)
t
=
one_hot_encoding
(
month
[:,
0
]
.
astype
(
'int'
),
13
)[:,
2
:]
xdata
=
np
.
concatenate
((
xdata
,
t
),
axis
=
1
)
date
=
slicer
(
xdata
[:,
1
],
8
,
10
)
week
=
np
.
ceil
(
date
.
astype
(
'int'
)
/
7
)
# week of month
t
=
one_hot_encoding
(
week
[:,
0
]
.
astype
(
'int'
),
6
)[:,
2
:]
xdata
=
np
.
concatenate
((
xdata
,
t
),
axis
=
1
)
xdata
=
xdata
[:,
2
:]
# dropping first 2 unnecessary columns
print
(
xdata
[
0
:
5
])
xdata
=
xdata
.
astype
(
'float32'
)
bias
=
np
.
ones
((
np
.
shape
(
xdata
)[
0
],
1
))
xdata
=
np
.
concatenate
((
bias
,
xdata
),
axis
=
1
)
if
ydata
is
None
:
return
xdata
ydata
=
ydata
.
astype
(
'float32'
)
return
xdata
,
ydata
raise
NotImplementedError
xdata
=
xdata
[:,[
2
,
3
,
4
,
7
,
9
]]
xdata
=
xdata
.
astype
(
'float32'
)
bias
=
np
.
ones
((
np
.
shape
(
xdata
)[
0
],
1
))
xdata
=
np
.
concatenate
((
bias
,
xdata
),
axis
=
1
)
if
ydata
is
None
:
return
xdata
ydata
=
ydata
.
astype
(
'float32'
)
return
xdata
,
ydata
raise
NotImplementedError
dictionary_of_losses
=
{
'mse'
:(
mean_squared_loss
,
mean_squared_gradient
),
...
...
@@ -237,51 +158,26 @@ dictionary_of_losses = {
'logcosh'
:(
mean_log_cosh_loss
,
mean_log_cosh_gradient
),
}
"""
#For outliers removal from wind speed column uncomment
def out(x, std, mean):
if ((x < mean + 2 * std)and (x > mean - 2 * std)):
return 0
else:
return 1
def outlier(xtrain, ytrain, std, mean):
a =[]
for i in xtrain[:, 11].astype('float32'):
a.append(out(i,std, mean))
a = np.array(a)
xdata = np.concatenate((xtrain, a.reshape(-1, 1)), axis=1)
ytrain = np.delete(ytrain, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, np.argwhere(xdata[:, -1].astype('int') > 0), 0)
xdata = np.delete(xdata, -1, 1)
return (xdata, ytrain)"""
def
main
():
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
xtrain
,
ytrain
,
xtest
=
read_dataset
(
args
.
train_file
,
args
.
test_file
)
# You are free to modify the main function as per your requirements.
# Uncomment the below lines and pass the appropriate value
"""
#For outliers removal from wind speed column uncomment
std = np.std(xtrain[:, 11].astype('float32'))
mean = np.mean(xtrain[:, 11].astype('float32'))
xtrain, ytrain =outlier(xtrain, ytrain, std, mean)"""
xtrainprocessed
,
ytrainprocessed
=
preprocess_dataset
(
xtrain
,
ytrain
)
xtestprocessed
=
preprocess_dataset
(
xtest
)
xtrain
,
ytrain
,
xtest
=
read_dataset
(
args
.
train_file
,
args
.
test_file
)
xtrainprocessed
,
ytrainprocessed
=
preprocess_dataset
(
xtrain
,
ytrain
)
xtestprocessed
=
preprocess_dataset
(
xtest
)
model
=
LinearRegressor
(
np
.
shape
(
xtrainprocessed
)[
1
])
model
=
LinearRegressor
(
np
.
shape
(
xtrainprocessed
)[
1
])
# The loss function is provided by command line argument
loss_fn
,
loss_grad
=
dictionary_of_losses
[
args
.
loss
]
# The loss function is provided by command line argument
loss_fn
,
loss_grad
=
dictionary_of_losses
[
args
.
loss
]
errlog
=
model
.
train
(
xtrainprocessed
,
ytrainprocessed
,
loss_fn
,
loss_grad
,
args
.
epoch
,
args
.
lr
)
ytest
=
model
.
predict
(
xtestprocessed
)
ytest
=
ytest
.
astype
(
'int'
)
output
=
[(
i
,
np
.
absolute
(
ytest
[
i
]))
for
i
in
range
(
len
(
ytest
))]
np
.
savetxt
(
"output.csv"
,
output
,
delimiter
=
','
,
fmt
=
"
%
d"
,
header
=
"instance (id),count"
,
comments
=
''
)
np
.
savetxt
(
"error.log"
,
errlog
,
delimiter
=
'
\n
'
,
fmt
=
"
%
f"
)
errlog
=
model
.
train
(
xtrainprocessed
,
ytrainprocessed
,
loss_fn
,
loss_grad
,
args
.
epoch
,
args
.
lr
)
ytest
=
model
.
predict
(
xtestprocessed
)
ytest
=
ytest
.
astype
(
'int'
)
output
=
[(
i
,
np
.
absolute
(
ytest
[
i
]))
for
i
in
range
(
len
(
ytest
))]
np
.
savetxt
(
"output.csv"
,
output
,
delimiter
=
','
,
fmt
=
"
%
d"
,
header
=
"instance (id),count"
,
comments
=
''
)
np
.
savetxt
(
"error.log"
,
errlog
,
delimiter
=
'
\n
'
,
fmt
=
"
%
f"
)
if
__name__
==
'__main__'
:
...
...
@@ -296,4 +192,4 @@ if __name__ == '__main__':
args
=
parser
.
parse_args
()
main
()
\ No newline at end of file
main
()
Assignment1/output.csv
View file @
eed7eff9
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment