Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
inLab
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sanjna
inLab
Commits
dec41b34
Commit
dec41b34
authored
Oct 02, 2020
by
Sanjna
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ML Assignment
parents
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
372 additions
and
0 deletions
+372
-0
LR.py
LR.py
+372
-0
No files found.
LR.py
0 → 100644
View file @
dec41b34
import
pandas
as
pd
import
numpy
as
np
import
matplotlib
matplotlib
.
use
(
"Agg"
)
from
matplotlib
import
pyplot
as
plt
from
numpy.linalg
import
norm
np
.
random
.
seed
(
42
)
'''
References:
https://www.cs.toronto.edu/~frossard/post/linear_regression/
'''
class
Scaler
():
# hint: https://machinelearningmastery.com/standardscaler-and-minmaxscaler-transforms-in-python/
#def __init__(self):
#raise NotImplementedError
#def __call__(self,features, is_train=False):
#raise NotImplementedError
pass
def
get_features
(
csv_path
,
is_train
=
False
,
scaler
=
None
):
'''
Description:
read input feature columns from csv file
manipulate feature columns, create basis functions, do feature scaling etc.
return a feature matrix (numpy array) of shape m x n
m is number of examples, n is number of features
return value: numpy array
'''
getfeatures
=
pd
.
read_csv
(
csv_path
)
features
=
getfeatures
.
drop
(
" shares"
,
axis
=
1
)
feature
=
np
.
array
(
features
,
dtype
=
'float32'
)
return
feature
'''
Arguments:
csv_path: path to csv file
is_train: True if using training data (optional)
scaler: a class object for doing feature scaling (optional)
'''
'''
help:
useful links:
* https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
* https://www.geeksforgeeks.org/python-read-csv-using-pandas-read_csv/
'''
'''
https://realpython.com/python-csv/#writing-csv-files-with-pandas
'''
raise
NotImplementedError
def
get_targets
(
csv_path
):
'''
Description:
read target outputs from the csv file
return a numpy array of shape m x 1
m is number of examples
'''
gettargets
=
pd
.
read_csv
(
csv_path
)
targets
=
gettargets
[
" shares"
]
target
=
np
.
array
(
targets
,
dtype
=
'float32'
)
return
target
raise
NotImplementedError
def
analytical_solution
(
feature_matrix
,
targets
,
C
=
0.0
):
'''
Description:
implement analytical solution to obtain weights
as described in lecture 5d or 4b
return value: numpy array
'''
'''
Arguments:
feature_matrix: numpy array of shape m x n
targets: numpy array of shape m x 1
'''
feature_t
=
feature_matrix
.
transpose
()
a
=
feature_t
@
feature_matrix
'''print(a)'''
k
=
len
(
a
)
m
=
len
(
feature_matrix
)
id
=
np
.
identity
(
k
,
dtype
=
None
)
'''print(id)'''
b
=
(
1
/
m
)
*
a
+
C
*
id
c
=
np
.
linalg
.
inv
(
b
)
b
=
c
@
feature_t
c
=
b
@
targets
b
=
(
1
/
m
)
*
c
return
b
raise
NotImplementedError
def
get_predictions
(
feature_matrix
,
weights
):
'''
description
return predictions given feature matrix and weights
return value: numpy array
'''
y
=
feature_matrix
@
weights
return
y
'''
Arguments:
feature_matrix: numpy array of shape m x n
weights: numpy array of shape n x 1
'''
raise
NotImplementedError
def
mse_loss
(
feature_matrix
,
weights
,
targets
):
'''
Description:
Implement mean squared error loss function
return value: float (scalar)
'''
m
=
len
(
feature_matrix
)
a
=
feature_matrix
@
weights
#print(targets.shape)
#print(a.shape)
b
=
a
-
targets
b
=
norm
(
b
)
**
2
mse
=
b
/
m
return
mse
'''
Arguments:
feature_matrix: numpy array of shape m x n
weights: numpy array of shape n x 1
targets: numpy array of shape m x 1
'''
raise
NotImplementedError
def
l2_regularizer
(
weights
):
'''
Description:
Implement l2 regularizer
return value: float (scalar)
'''
'''
Arguments
weights: numpy array of shape n x 1
'''
n
=
norm
(
weights
)
'''print(n)'''
return
n
**
2
raise
NotImplementedError
def
loss_fn
(
feature_matrix
,
weights
,
targets
,
C
=
0.0
):
'''
Description:
compute the loss function: mse_loss + C * l2_regularizer
'''
a
=
mse_loss
(
feature_matrix
,
weights
,
targets
)
b
=
l2_regularizer
(
weights
)
c
=
a
+
(
C
*
b
)
return
c
'''
Arguments:
feature_matrix: numpy array of shape m x n
weights: numpy array of shape n x 1
targets: numpy array of shape m x 1
C: weight for regularization penalty
return value: float (scalar)
'''
raise
NotImplementedError
def
compute_gradients
(
feature_matrix
,
weights
,
targets
,
C
=
0.0
):
'''
Description:
compute gradient of weights w.r.t. the loss_fn function implemented above
'''
m
=
len
(
feature_matrix
)
gr1
=
feature_matrix
@
weights
gr2
=
gr1
-
targets
gr3
=
(
2
*
gr2
)
/
m
gr4
=
feature_matrix
.
T
@
gr3
gr5
=
l2_regularizer
(
weights
)
gr6
=
pow
(
gr5
,
0.5
)
gr7
=
gr4
+
(
2
*
C
*
gr6
)
#print(gr5)
return
gr7
'''
Arguments:
feature_matrix: numpy array of shape m x n
weights: numpy array of shape n x 1
targets: numpy array of shape m x 1
C: weight for regularization penalty
return value: numpy array
'''
raise
NotImplementedError
def
sample_random_batch
(
feature_matrix
,
targets
,
batch_size
):
'''
Description
Batching -- Randomly sample batch_size number of elements from feature_matrix and targets
return a tuple: (sampled_feature_matrix, sampled_targets)
sampled_feature_matrix: numpy array of shape batch_size x n
sampled_targets: numpy array of shape batch_size x 1
'''
k
=
len
(
targets
)
n
=
np
.
random
.
randint
(
0
,
k
,
batch_size
)
sampled_feature_matrix
=
feature_matrix
[
n
]
#print('SFeature:',sampled_feature_matrix.shape)
sampled_targets1
=
targets
[
n
]
#print('STargets:',sampled_targets1.shape)
sampled_targets
=
sampled_targets1
.
reshape
(
batch_size
,
1
)
return
sampled_feature_matrix
,
sampled_targets
'''
Arguments:
feature_matrix: numpy array of shape m x n
targets: numpy array of shape m x 1
batch_size: int
'''
'''
References:
https://numpy.org/doc/stable/reference/random/generated/numpy.random.randint.html
https://machinelearningmastery.com/gentle-introduction-mini-batch-gradient-descent-configure-batch-size/
https://www.geeksforgeeks.org/ml-mini-batch-gradient-descent-with-python/?ref=rp
'''
raise
NotImplementedError
def
initialize_weights
(
n
):
'''
Description:
initialize weights to some initial values
return value: numpy array of shape n x 1
'''
'''
Arguments
n: int
'''
a
=
np
.
zeros
(
n
,
dtype
=
int
)
b
=
a
.
reshape
(
n
,
1
)
return
b
raise
NotImplementedError
def
update_weights
(
weights
,
gradients
,
lr
):
'''
Description:
update weights using gradient descent
return value: numpy matrix of shape nx1
'''
weights1
=
weights
-
(
lr
*
gradients
)
return
weights1
'''
Arguments:
# weights: numpy matrix of shape nx1
# gradients: numpy matrix of shape nx1
# lr: learning rate
'''
raise
NotImplementedError
def
early_stopping
(
arg_1
=
None
,
arg_2
=
None
,
arg_3
=
1e+180
):
# allowed to modify argument list as per your need
# return True or False
'''
References:
https://www.google.com/search?rlz=1C5CHFA_enIN824IN824&sxsrf=ALeKk015bshZTtRzJR47BxJ0DJCNs1A50Q
%3
A1601228315233&ei=G85wX93oDZPC3LUPiMCREA&q=early+stopping+using+python+numpy+for+linear+regression&oq=early+stopping+using+python+numpy+for+linear+regression&gs_lcp=CgZwc3ktYWIQAzoECAAQRzoFCCEQoAE6BwghEAoQoAE6BAghEBVQ9q0BWIDWAWD71wFoAHABeACAAdQBiAHfFZIBBjAuMjEuMZgBAKABAaoBB2d3cy13aXrIAQjAAQE&sclient=psy-ab&ved=0ahUKEwid4run8InsAhUTIbcAHQhgBAIQ4dUDCA0&uact=5
'''
if
abs
(
arg_1
-
arg_2
)
<=
arg_3
:
return
True
else
:
return
False
raise
NotImplementedError
def
do_gradient_descent
(
train_feature_matrix
,
train_targets
,
dev_feature_matrix
,
dev_targets
,
lr
=
1.0
,
C
=
0.0
,
batch_size
=
32
,
max_steps
=
10000
,
eval_steps
=
5
):
'''
feel free to significantly modify the body of this function as per your needs.
** However **, you ought to make use of compute_gradients and update_weights function defined above
return your best possible estimate of LR weights
a sample code is as follows --
'''
'''
References:
https://towardsdatascience.com/implement-gradient-descent-in-python-9b93ed7108d1
https://blog.datumbox.com/tuning-the-learning-rate-in-gradient-descent/#:~:text=In
%20
order
%20
for
%20
Gradient
%20
Descent,will
%20
skip
%20
the
%20
optimal
%20
solution.
https://towardsdatascience.com/hyperparameter-tuning-with-python-keras-xgboost-guide-7cb3ef480f9c
'''
n
=
len
(
train_feature_matrix
[
0
])
weights
=
initialize_weights
(
n
)
dev_loss
=
mse_loss
(
dev_feature_matrix
,
weights
,
dev_targets
)
train_loss
=
mse_loss
(
train_feature_matrix
,
weights
,
train_targets
)
print
(
"step {}
\t
dev loss: {}
\t
train loss: {}"
.
format
(
0
,
dev_loss
,
train_loss
))
for
step
in
range
(
1
,
max_steps
+
1
):
#sample a batch of features and gradients
features
,
targets
=
sample_random_batch
(
train_feature_matrix
,
train_targets
,
batch_size
)
#compute gradients
gradients
=
compute_gradients
(
features
,
weights
,
targets
,
C
)
weights1
=
weights
#update weights
weights
=
update_weights
(
weights
,
gradients
,
lr
)
dev_loss1
=
dev_loss
train_loss1
=
train_loss
if
step
%
eval_steps
==
0
:
dev_loss
=
mse_loss
(
dev_feature_matrix
,
weights
,
dev_targets
)
train_loss
=
mse_loss
(
train_feature_matrix
,
weights
,
train_targets
)
print
(
"step {}
\t
dev loss: {}
\t
train loss: {}"
.
format
(
step
,
dev_loss
,
train_loss
))
if
early_stopping
(
dev_loss1
,
dev_loss
,
1.0e+100
):
break
'''
implement early stopping etc. to improve performance.
'''
if
dev_loss
<
dev_loss1
:
#or train_loss < train_loss1:
lr
*=
2.0
elif
dev_loss
>
dev_loss1
:
#or train_loss > train_loss1:
weights
=
weights1
lr
*=
3.0e-50
return
weights
def
do_evaluation
(
feature_matrix
,
targets
,
weights
):
# your predictions will be evaluated based on mean squared error
predictions
=
get_predictions
(
feature_matrix
,
weights
)
loss
=
mse_loss
(
feature_matrix
,
weights
,
targets
)
return
loss
if
__name__
==
'__main__'
:
scaler
=
Scaler
()
#use of scaler is optional
train_features
,
train_targets
=
get_features
(
'../input/programming-assignment-1/train.csv'
,
True
,
scaler
),
get_targets
(
'../input/programming-assignment-1/train.csv'
)
dev_features
,
dev_targets
=
get_features
(
'../input/programming-assignment-1/dev.csv'
,
False
,
scaler
),
get_targets
(
'../input/programming-assignment-1/dev.csv'
)
a_solution
=
analytical_solution
(
train_features
,
train_targets
,
C
=
1e-8
)
print
(
'evaluating analytical_solution...'
)
dev_loss
=
do_evaluation
(
dev_features
,
dev_targets
,
a_solution
)
train_loss
=
do_evaluation
(
train_features
,
train_targets
,
a_solution
)
print
(
'analytical_solution
\t
train loss: {}, dev_loss: {} '
.
format
(
train_loss
,
dev_loss
))
print
(
'training LR using gradient descent...'
)
gradient_descent_soln
=
do_gradient_descent
(
train_features
,
train_targets
,
dev_features
,
dev_targets
,
lr
=
1.0
,
C
=
0.0
,
batch_size
=
32
,
max_steps
=
2000000
,
eval_steps
=
5
)
print
(
'evaluating iterative_solution...'
)
dev_loss
=
do_evaluation
(
dev_features
,
dev_targets
,
gradient_descent_soln
)
train_loss
=
do_evaluation
(
train_features
,
train_targets
,
gradient_descent_soln
)
print
(
'gradient_descent_soln
\t
train loss: {}, dev_loss: {} '
.
format
(
train_loss
,
dev_loss
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment