Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
FML_Project_RF_Chi2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sanchit
FML_Project_RF_Chi2
Commits
1446cab0
Commit
1446cab0
authored
Dec 06, 2020
by
Sanchit
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add new file
parent
2790fbf1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
102 additions
and
0 deletions
+102
-0
GA_LR.py
GA_LR.py
+102
-0
No files found.
GA_LR.py
0 → 100644
View file @
1446cab0
'''READ ME
parameters used are:
number of features : first thirty
test_size=0.30,
'''
import
numpy
as
np
import
pandas
as
pd
import
random
import
matplotlib.pyplot
from
sklearn.model_selection
import
train_test_split
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.metrics
import
accuracy_score
from
sklearn.metrics
import
confusion_matrix
#splitting the model into training and testing set
nfeat
=
30
X
=
np
.
loadtxt
(
'dataset/data.csv'
,
delimiter
=
','
,
converters
=
None
,
skiprows
=
1
,
usecols
=
range
(
1
,
nfeat
),
unpack
=
False
,
ndmin
=
0
,
)
y
=
np
.
genfromtxt
(
'dataset/labels.csv'
,
dtype
=
'str'
,
skip_header
=
1
,
usecols
=
[
1
],
delimiter
=
','
)
df
=
pd
.
DataFrame
(
X
,
columns
=
np
.
arange
(
1
,
nfeat
))
label
=
np
.
array
(
y
)
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
df
,
label
,
test_size
=
0.30
,
random_state
=
101
)
#training a logistics regression model
logmodel
=
LogisticRegression
()
logmodel
.
fit
(
X_train
,
y_train
)
predictions
=
logmodel
.
predict
(
X_test
)
print
(
"Accuracy = "
+
str
(
accuracy_score
(
y_test
,
predictions
)))
#defining various steps required for the genetic algorithm
def
initilization_of_population
(
size
,
n_feat
):
population
=
[]
for
i
in
range
(
size
):
chromosome
=
np
.
ones
(
n_feat
,
dtype
=
np
.
bool
)
#print chromosome
#chromosome[:int(0.3*n_feat)]=False
chromosome
[:
int
(
0.95
*
nfeat
)]
=
False
#print chromosome,"then"
np
.
random
.
shuffle
(
chromosome
)
population
.
append
(
chromosome
)
return
population
def
fitness_score
(
population
):
scores
=
[]
for
chromosome
in
population
:
if
sum
(
chromosome
)
==
0
:
chromosome
[
0
]
=
True
#print chromosome,sum(chromosome)
logmodel
.
fit
(
X_train
.
iloc
[:,
chromosome
],
y_train
)
predictions
=
logmodel
.
predict
(
X_test
.
iloc
[:,
chromosome
])
scores
.
append
(
accuracy_score
(
y_test
,
predictions
))
scores
,
population
=
np
.
array
(
scores
),
np
.
array
(
population
)
inds
=
np
.
argsort
(
scores
)
#print scores,"here"
return
list
(
scores
[
inds
][::
-
1
]),
list
(
population
[
inds
,:][::
-
1
])
def
selection
(
pop_after_fit
,
n_parents
):
population_nextgen
=
[]
for
i
in
range
(
n_parents
):
population_nextgen
.
append
(
pop_after_fit
[
i
])
return
population_nextgen
def
crossover
(
pop_after_sel
):
population_nextgen
=
pop_after_sel
for
i
in
range
(
len
(
pop_after_sel
)):
child
=
pop_after_sel
[
i
]
child
[
3
:
7
]
=
pop_after_sel
[(
i
+
1
)
%
len
(
pop_after_sel
)][
3
:
7
]
population_nextgen
.
append
(
child
)
return
population_nextgen
def
mutation
(
pop_after_cross
,
mutation_rate
):
population_nextgen
=
[]
for
i
in
range
(
0
,
len
(
pop_after_cross
)):
chromosome
=
pop_after_cross
[
i
]
for
j
in
range
(
len
(
chromosome
)):
if
random
.
random
()
<
mutation_rate
:
chromosome
[
j
]
=
not
chromosome
[
j
]
population_nextgen
.
append
(
chromosome
)
#print(population_nextgen)
return
population_nextgen
def
generations
(
size
,
n_feat
,
n_parents
,
mutation_rate
,
n_gen
,
X_train
,
X_test
,
y_train
,
y_test
):
best_chromo
=
[]
best_score
=
[]
population_nextgen
=
initilization_of_population
(
size
,
n_feat
)
for
i
in
range
(
n_gen
):
scores
,
pop_after_fit
=
fitness_score
(
population_nextgen
)
print
(
scores
[:
2
])
pop_after_sel
=
selection
(
pop_after_fit
,
n_parents
)
pop_after_cross
=
crossover
(
pop_after_sel
)
population_nextgen
=
mutation
(
pop_after_cross
,
mutation_rate
)
best_chromo
.
append
(
pop_after_fit
[
0
])
best_score
.
append
(
scores
[
0
])
#print sum(pop_after_fit[0])
return
best_chromo
,
best_score
chromo
,
score
=
generations
(
size
=
200
,
n_feat
=
nfeat
-
1
,
n_parents
=
100
,
mutation_rate
=
0.010
,
n_gen
=
38
,
X_train
=
X_train
,
X_test
=
X_test
,
y_train
=
y_train
,
y_test
=
y_test
)
logmodel
.
fit
(
X_train
.
iloc
[:,
chromo
[
-
1
]],
y_train
)
predictions
=
logmodel
.
predict
(
X_test
.
iloc
[:,
chromo
[
-
1
]])
print
(
"Accuracy score after genetic algorithm is= "
+
str
(
accuracy_score
(
y_test
,
predictions
)))
cm
=
confusion_matrix
(
y_test
,
predictions
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment