Commit bf87ab8c authored by Sushant Mahajan's avatar Sushant Mahajan

modified model and corrected loss calculation

parent c90c56f0
This diff is collapsed.
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import sys import sys
import os import os
import csv import csv
from random import random from random import seed, random
from pprint import pprint as pp from pprint import pprint as pp
from math import log, exp from math import log, exp
import numpy as np import numpy as np
...@@ -45,7 +45,7 @@ def sigmoid(v): ...@@ -45,7 +45,7 @@ def sigmoid(v):
return 1.0/(1+exp(-v)) return 1.0/(1+exp(-v))
def sigmoidGradient(v): def sigmoidGradient(v):
return [a*b for a,b in zip([sigmoid(x) for x in v], [sigmoid(x) for x in v])] return [a*b for a,b in zip([sigmoid(x) for x in v], [1-sigmoid(x) for x in v])]
def regularization(cost, w1, w2, lamb, m): def regularization(cost, w1, w2, lamb, m):
reg = sum(w1*w1)+sum(w2*w2) reg = sum(w1*w1)+sum(w2*w2)
...@@ -104,34 +104,66 @@ def cost(li, lh, lo, weights, X, y, lamb): ...@@ -104,34 +104,66 @@ def cost(li, lh, lo, weights, X, y, lamb):
return J,grad return J,grad
def fit(X, y, li, lh, lo, weights, lamb, eta, passes=10000, verbose=True): def fit(X, y, li, lh, lo, weight, lamb, eta, passes=1000, verbose=True):
weights = np.copy(weight)
for i in range(1,passes+1): for i in range(1,passes+1):
J, dw = cost(li, lh, lo, weights, X, y, lamb) J, dw = cost(li, lh, lo, weights, X, y, lamb)
#print(weights.shape, dw.shape) #print(weights.shape, dw.shape)
weights += -eta*dw weights += -eta*dw
print(i,"\r", end='') print(i,"\r", end='')
if verbose and i%1000 == 0: if verbose and i%(passes/10) == 0:
print() print()
print(J) print(J)
return weights
def predict(x, w1, w2): def predict(x, w1, w2):
x=[1]+x #58x1 #x=[1]+x #58x1
x = np.array(x) x = np.array(x)
h1 = sigmoid(np.dot(w1,x).tolist()) #28x58 * 58x1 = 28x1 h1 = [sigmoid(z) for z in np.dot(w1,x).tolist()] #28x58 * 58x1 = 28x1
h1 = [1]+h1 h1 = [1]+h1
h2 = sigmoid(np.dot(w2,h1).tolist()) #1x29 * 29x1 = 1x1 h2 = sigmoid(np.dot(w2,h1).tolist()[0]) #1x29 * 29x1 = 1x1
return 1 if h2>0.5 else 0 return h2
def setWeightsFromFile(weights):
if os.path.isfile("weights"):
with open("weights","rb") as wfile:
weights = np.load(wfile)
return True
return False
if __name__ == "__main__": if __name__ == "__main__":
np.random.rand(47)
X,y = getData(params["train"]) X,y = getData(params["train"])
tX,ty = getData(params["test"], isTrain=False) tX,ty = getData(params["test"], isTrain=False)
# print(len(X), len(X[0]), len(y), X[0]) # print(len(X), len(X[0]), len(y), X[0])
# print(len(tX), len(ty), tX[0]) # print(len(tX), len(ty), tX[0])
li,lh,lo = tuple(params["layers"]) li,lh,lo = tuple(params["layers"])
weights = np.array([random() for _ in range(lh*(li+1)+lo*(lh+1))]) weights = np.random.rand(lh*(li+1)+lo*(lh+1))
lamb,eta = 0.1,0.1 lamb,eta = 0.1,0.1
fit(X, y, li, lh, lo, weights, lamb, eta) if not setWeightsFromFile(weights):
weights = fit(X, y, li, lh, lo, weights, lamb, eta, passes=300)
with open("weights","wb") as wfile:
np.save(wfile, weights)
w1 = weights[:(li+1)*lh].reshape(lh,li+1) #28x58
w2 = weights[(li+1)*lh:].reshape(lo,lh+1) #1x29
py = []
for x in X:
py.append(predict(x,w1,w2))
print(py)
# print("train accuracy", len(list(filter(lambda z:z==0,[a-b for a,b in zip(py,y)])))*1.0/len(y))
# pty = []
# for x in tX:
# pty.append(predict(x,w1,w2))
# with open("answer.txt","w") as dest:
# writer = csv.writer(dest)
# for i,ans in enumerate(pty):
# writer.writerow([i,ans])
# J,grad = cost(li, lh, lo, weights, X, y, 0.1) # J,grad = cost(li, lh, lo, weights, X, y, 0.1)
# print(J,grad) # print(J,grad)
#print(len(w1), len(w1[0]), len(w2), len(w2[0])) #print(len(w1), len(w1[0]), len(w2), len(w2[0]))
\ No newline at end of file
#! /usr/bin/env python3
import sys
import os
import csv
from random import seed, random
from pprint import pprint as pp
from math import log, exp
import numpy as np
def doNormalize(X):
#do 0 mean 1 std normalization
x1 = np.array(X,dtype=float)
for i in range(len(X[0])):
col = x1[:,i]
mean,std = col.mean(),col.std()
std = std if std!=0.0 else 1.0
x1[:,i] = (x1[:,i]-mean)/std
return x1.tolist()
def getData(srcF, isTrain=True, addBias=True, normalize=True):
X,y=[],[]
with open(srcF) as src:
reader = csv.reader(src, delimiter=',')
for i,row in enumerate(reader):
temp = []
if addBias:
temp.append(1)
end = -1 if isTrain else len(row)
temp.extend(row[:end])
#correct data type
X.append(list(map(float, temp)))
if isTrain:
v = int(row[-1])
entry = [1,0] if v==1 else [0,1]
y.append(entry)
if normalize:
X = doNormalize(X)
#print(X[0])
return (np.array(X),np.array(y))
def sigmoid(v):
return 1.0/(1+np.exp(-v))
def sigmoidDiff(v):
return sigmoid(v)*(1-sigmoid(v))
def feedforward(model, X):
w1,w2 = model['w1'],model['w2'] #58x28, 29x2
z1 = X.dot(w1) #mx58 * 58x28 = mx28
a1 = sigmoid(z1) #mx28
a1 = np.insert(a1,0,np.ones(a1.shape[0]),axis=1) #mx29
z2 = a1.dot(w2) #mx29 * 29x2
h = sigmoid(z2) #mx2
return h
def restrictProb(a):
return min([max([a,1e-15]), 1-1e-15])
def cost(model, X, y):
m = X.shape[0]
h = feedforward(model, X)
y2 = y.astype(float)
vf = np.vectorize(restrictProb)
py = vf(h)
loss = -(1.0/m)*np.sum(y*np.log(py)+(1-y)*np.log(1-py)) #mx2 .* mx2
#regularize
w1,w2 = model['w1'],model['w2']
loss += model['lambda']*(np.sum(np.square(w1)) + np.sum(np.square(w2)))/(2*m)
return loss
def predict(model, x):
w1,w2 = model['w1'],model['w2']
#print(x.shape, w1.shape)
z1 = x.dot(w1) #1x58 * 58x28 = 1x28
a1 = sigmoid(z1) #1x28
a1 = np.insert(a1,0,1)
z2 = a1.dot(w2) #1x29 x 29x2
h = sigmoid(z2)
return 1-np.argmax(h)
def fit(model, X, y, passes=1000):
m = X.shape[0]
w1,w2 = model['w1'],model['w2'] #58x28, 29x2
li,lh,lo=model['li'],model['lh'],model['lo']
for i in range(passes):
z1 = X.dot(w1) #mx58 * 58x28 = mx28
a2 = sigmoid(z1) #mx28
a2 = np.insert(a2,0,np.ones(a2.shape[0]),axis=1) #mx29
z2 = a2.dot(w2) #mx29 * 29x2
h = sigmoid(z2) #mx2
#backpropagation
del3 = h-y #mx2
z1 = np.insert(z1,0,np.ones(z1.shape[0]),axis=1)
#mx29
del2 = del3.dot(w2.reshape(lo,lh+1))*sigmoidDiff(z1)
#mx2 * 2x29 .* mx29 = mx29
del2 = del2[:,1:] #mx28
dw1 = np.dot(X.T,del2) #58xm*mx28=58x28
dw2 = (a2.T).dot(del3) #29xm*mx2=29x2
dw1 += (model['lambda']/m)*w1
dw2 += (model['lambda']/m)*w2
w1 += -model['eta']*dw1
w2 += -model['eta']*dw2
model['w1'] = w1
model['w2'] = w2
if i % (passes/10)==0:
print(i,cost(model, X, y))
return model
if __name__ == "__main__":
np.random.seed(47)
model = {}
model = {'li':57,'lh':85,'lo':2,'lambda':0.05,'eta':0.01}
# model['w1'] = np.random.randn(model['li']+1, model['lh'])/np.sqrt(model['li']+1) #58x28
# model['w2'] = np.random.randn(model['lh']+1, model['lo'])/np.sqrt(model['lh']+1) #29x2
model['w1'] = np.random.rand(model['li']+1, model['lh'])*0.24 - 0.12
model['w2'] = np.random.rand(model['lh']+1, model['lo'])*0.24 - 0.12
X,y = getData("Train.csv")
tX,ty = getData("TestX.csv",isTrain=False)
#cost(model, X, y)
# for h in [57/3, 57/2, 2*57/3, 57, 3*57/2]:
# h=int(h)
# model = {'li':57,'lh':h,'lo':2,'lambda':0.1,'eta':0.1}
# model['w1'] = np.random.randn(model['li']+1, model['lh'])/np.sqrt(model['li']+1) #58x28
# model['w2'] = np.random.randn(model['lh']+1, model['lo'])/np.sqrt(model['lh']+1) #29x2
model = fit(model, X, y)
m = X.shape[0]
py,y2=[],[]
for i,row in enumerate(tX):
ans = predict(model, np.array(row))
py.append(ans)
# y2.append(1 if y[i][0]==1 else 0)
# acc = m-np.sum(abs(np.array(py)-np.array(y2)))
# print(h, acc*100/m)
with open("answer.txt","w") as wfile:
writer = csv.writer(wfile)
writer.writerow(['Id','Label'])
for i,ans in enumerate(py):
writer.writerow([i,ans])
# acc = m-np.sum(abs(np.array(py)-np.array(y2)))
# print(acc*100/m)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment