Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
FML_Project_RF_Chi2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sanchit
FML_Project_RF_Chi2
Commits
1c348a48
Commit
1c348a48
authored
Dec 06, 2020
by
Sanchit
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add new file
parent
d3c33bd9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
144 additions
and
0 deletions
+144
-0
pso_RF.py
pso_RF.py
+144
-0
No files found.
pso_RF.py
0 → 100644
View file @
1c348a48
import
numpy
as
np
import
pandas
as
pd
import
pyswarms
as
ps
from
sklearn.metrics
import
mean_squared_error
from
sklearn
import
neighbors
from
sklearn.model_selection
import
KFold
from
sklearn.ensemble
import
RandomForestClassifier
from
sklearn.metrics
import
multilabel_confusion_matrix
,
precision_recall_fscore_support
,
recall_score
,
precision_score
,
f1_score
,
confusion_matrix
,
accuracy_score
from
sklearn
import
preprocessing
import
seaborn
as
sn
import
matplotlib.pyplot
as
plt
np
.
random
.
seed
(
42
)
## Load data and labels ###
label
=
pd
.
read_csv
(
'dataset/labels.csv'
)
data
=
pd
.
read_csv
(
'dataset/data.csv'
)
#y = label.Class.values
X
=
data
.
values
[:,
1
:]
#Encode the variable :
encode
=
preprocessing
.
LabelEncoder
()
encode
.
fit
(
label
.
Class
.
unique
())
y
=
encode
.
transform
(
label
.
Class
.
values
)
### PSO function ###
def
pso_feature_selection
(
X
,
y
):
### Change this to NN here ####
classifier
=
RandomForestClassifier
(
n_estimators
=
10
,
criterion
=
'entropy'
,
random_state
=
42
)
total_feat
=
X
.
shape
[
1
]
# Define objective function
def
f_per_particle
(
m
):
#total_features = total_feat
# Get the subset of the features from the binary mask
if
np
.
count_nonzero
(
m
)
==
0
:
X_subset
=
X
else
:
X_subset
=
X
[:,
m
==
1
]
# Perform classification and store performance in P
classifier
.
fit
(
X_subset
,
y
)
# Compute for the objective function
P
=
(
classifier
.
score
(
X_subset
,
y
))
#j = (alpha * (1.0 - P)+ (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))
return
P
def
f
(
x
):
n_particles
=
x
.
shape
[
0
]
j
=
[
f_per_particle
(
x
[
i
])
for
i
in
range
(
n_particles
)]
return
np
.
array
(
j
)
options
=
{
'c1'
:
0.5
,
'c2'
:
0.5
,
'w'
:
0.9
,
'k'
:
30
,
'p'
:
2
}
# Call instance of PSO
dimensions
=
total_feat
# dimensions should be the number of features
#init = np.random.choice([0, 1], size=(10,dimensions), p=[(dimensions-50)/dimensions, 50/dimensions])
optimizer
=
ps
.
discrete
.
BinaryPSO
(
n_particles
=
30
,
dimensions
=
dimensions
,
options
=
options
,
init_pos
=
None
)
# Perform optimization
cost
,
pos
=
optimizer
.
optimize
(
f
,
iters
=
10
,
verbose
=
True
)
return
pos
## Cross validation ###
KF
=
KFold
(
n_splits
=
5
,
shuffle
=
True
)
total_train_accuracy
=
0
total_test_accuracy
=
0
total_precision
=
0
total_recall
=
0
total_fscore
=
0
final_conf_mat
=
0
for
train_index
,
test_index
in
KF
.
split
(
X
):
# Split train-test
x_train
,
x_test
=
X
[
train_index
],
X
[
test_index
]
y_train
,
y_test
=
y
[
train_index
],
y
[
test_index
]
#### Feature Selection
pos
=
pso_feature_selection
(
x_test
,
y_test
)
print
(
np
.
count_nonzero
(
pos
))
x1_train
=
np
.
array
(
x_train
[:,
pos
==
1
])
x1_test
=
np
.
array
(
x_test
[:,
pos
==
1
])
### Classification model (ADD NN to be fit here)###
classifier
=
RandomForestClassifier
(
n_estimators
=
10
,
criterion
=
'entropy'
,
random_state
=
42
)
classifier
.
fit
(
x1_train
,
y_train
)
y_pred
=
classifier
.
predict
(
x1_test
)
##### Scoring #########
print
(
"Checking on Train Set"
)
print
(
"
\n
Accuracy on Training Set :"
+
str
(
classifier
.
score
(
x1_train
,
y_train
)))
total_train_accuracy
+=
classifier
.
score
(
x1_train
,
y_train
)
print
(
"Checking on Test Set"
)
print
(
"
\n
Accuracy on Testing Set :"
+
str
(
accuracy_score
(
y_test
,
y_pred
)))
total_test_accuracy
+=
accuracy_score
(
y_test
,
y_pred
)
total_precision
+=
precision_score
(
y_test
,
y_pred
,
average
=
'macro'
)
total_recall
+=
recall_score
(
y_test
,
y_pred
,
average
=
'macro'
)
total_fscore
+=
f1_score
(
y_test
,
y_pred
,
average
=
'macro'
)
print
(
"
\n
Precision Score"
)
print
(
precision_score
(
y_test
,
y_pred
,
average
=
'macro'
))
print
(
"
\n
Recall Score"
)
print
(
recall_score
(
y_test
,
y_pred
,
average
=
'macro'
))
print
(
"
\n
F1 Score"
)
print
(
f1_score
(
y_test
,
y_pred
,
average
=
'macro'
))
#Confusion Matrix
conf_mat
=
confusion_matrix
(
y_test
,
y_pred
)
final_conf_mat
+=
conf_mat
print
(
"Confusion matrix :
\n
"
)
print
(
conf_mat
)
print
(
"Mean train accuracy :
%.2
f"
%
((
total_train_accuracy
/
5
)
*
100
))
print
(
"Mean test accuracy :
%.2
f"
%
((
total_test_accuracy
/
5
)
*
100
))
print
(
"Mean precision :
%.2
f"
%
((
total_precision
/
5
)
*
100
))
print
(
"Mean recall :
%.2
f"
%
((
total_recall
/
5
)
*
100
))
print
(
"Mean fscore :
%.2
f"
%
((
total_fscore
/
5
)
*
100
))
print
(
"Confusion matrix : "
)
print
(
final_conf_mat
)
#plt.figure(figsize=(5,5))
sn
.
heatmap
(
final_conf_mat
,
annot
=
True
)
plt
.
xlabel
(
'Predicted'
)
plt
.
ylabel
(
'Truth'
)
plt
.
show
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment