modified model and corrected loss calculation

bf87ab8c · Sushant Mahajan · c90c56f0 · bf87ab8c · bf87ab8c · bf87ab8c
Commit bf87ab8c authored Apr 10, 2016 by Sushant Mahajan
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 1805 additions and 11 deletions

answer.txt answer.txt +1601 -0

model.py model.py +43 -11

model2.py model2.py +161 -0

No files found.
--- a/answer.txt
+++ b/answer.txt
--- a/model.py
+++ b/model.py
@@ -2,7 +2,7 @@
 import sys
 import os
 import csv
-from random import random
+from random import seed, random
 from pprint import pprint as pp
 from math import log, exp
 import numpy as np
@@ -45,7 +45,7 @@ def sigmoid(v):
    return 1.0/(1+exp(-v))

 def sigmoidGradient(v):
-    return [a*b for a,b in zip([sigmoid(x) for x in v], [sigmoid(x) for x in v])]
+    return [a*b for a,b in zip([sigmoid(x) for x in v], [1-sigmoid(x) for x in v])]

 def regularization(cost, w1, w2, lamb, m):
    reg = sum(w1*w1)+sum(w2*w2)
@@ -104,34 +104,66 @@ def cost(li, lh, lo, weights, X, y, lamb):

    return J,grad

-def fit(X, y, li, lh, lo, weights, lamb, eta, passes=10000, verbose=True):
+def fit(X, y, li, lh, lo, weight, lamb, eta, passes=1000, verbose=True):
+    weights = np.copy(weight)
    for i in range(1,passes+1):
        J, dw = cost(li, lh, lo, weights, X, y, lamb)
        #print(weights.shape, dw.shape)
        weights += -eta*dw
        print(i,"\r", end='')
-        if verbose and i%1000 == 0:
+        if verbose and i%(passes/10) == 0:
            print()
            print(J)
+    return weights

 def predict(x, w1, w2):
-    x=[1]+x    #58x1
+    #x=[1]+x    #58x1    
    x = np.array(x)
-    h1 = sigmoid(np.dot(w1,x).tolist())    #28x58 * 58x1 = 28x1
+    h1 = [sigmoid(z) for z in np.dot(w1,x).tolist()]    #28x58 * 58x1 = 28x1
    h1 = [1]+h1
-    h2 = sigmoid(np.dot(w2,h1).tolist())    #1x29 * 29x1 = 1x1
-    return 1 if h2>0.5 else 0
-    
+    h2 = sigmoid(np.dot(w2,h1).tolist()[0])    #1x29 * 29x1 = 1x1
+    return h2
+
+def setWeightsFromFile(weights):
+    if os.path.isfile("weights"):
+        with open("weights","rb") as wfile:
+            weights = np.load(wfile)
+            return True
+    return False
+
    
 if __name__ == "__main__":
+    np.random.rand(47)
    X,y = getData(params["train"])
    tX,ty = getData(params["test"], isTrain=False)
    # print(len(X), len(X[0]), len(y), X[0])
    # print(len(tX), len(ty), tX[0])
    li,lh,lo = tuple(params["layers"])
-    weights = np.array([random() for _ in range(lh*(li+1)+lo*(lh+1))])
+    weights = np.random.rand(lh*(li+1)+lo*(lh+1))
    lamb,eta = 0.1,0.1
-    fit(X, y, li, lh, lo, weights, lamb, eta)
+    if not setWeightsFromFile(weights):
+        weights = fit(X, y, li, lh, lo, weights, lamb, eta, passes=300)
+        with open("weights","wb") as wfile:
+            np.save(wfile, weights)
+    w1 = weights[:(li+1)*lh].reshape(lh,li+1)    #28x58
+    w2 = weights[(li+1)*lh:].reshape(lo,lh+1)    #1x29
+    py = []
+    for x in X:
+        py.append(predict(x,w1,w2))
+    print(py)
+    # print("train accuracy", len(list(filter(lambda z:z==0,[a-b for a,b in zip(py,y)])))*1.0/len(y))
+
+    # pty = []
+    # for x in tX:
+    #     pty.append(predict(x,w1,w2))
+
+    # with open("answer.txt","w") as dest:
+    #     writer = csv.writer(dest)
+    #     for i,ans in enumerate(pty):
+    #         writer.writerow([i,ans])
+
+
+
    # J,grad = cost(li, lh, lo, weights, X, y, 0.1)
    # print(J,grad)
    #print(len(w1), len(w1[0]), len(w2), len(w2[0]))
\ No newline at end of file
--- a/model2.py
+++ b/model2.py
+#! /usr/bin/env python3
+import sys
+import os
+import csv
+from random import seed, random
+from pprint import pprint as pp
+from math import log, exp
+import numpy as np
+
+def doNormalize(X):
+    #do 0 mean 1 std normalization
+    x1 = np.array(X,dtype=float)
+    for i in range(len(X[0])):
+        col = x1[:,i]
+        mean,std = col.mean(),col.std()
+        std = std if std!=0.0 else 1.0
+        x1[:,i] = (x1[:,i]-mean)/std
+    return x1.tolist()
+
+def getData(srcF, isTrain=True, addBias=True, normalize=True):
+    X,y=[],[]
+    with open(srcF) as src:
+        reader = csv.reader(src, delimiter=',')
+        for i,row in enumerate(reader):
+            temp = []
+            if addBias:
+                temp.append(1)
+            end = -1 if isTrain else len(row)
+            temp.extend(row[:end])
+            
+            #correct data type
+            X.append(list(map(float, temp)))
+
+            if isTrain:
+                v = int(row[-1])
+                entry = [1,0] if v==1 else [0,1]
+                y.append(entry)
+
+    if normalize:
+        X = doNormalize(X)
+    #print(X[0])
+    return (np.array(X),np.array(y))
+
+def sigmoid(v):
+    return 1.0/(1+np.exp(-v))
+
+def sigmoidDiff(v):
+    return sigmoid(v)*(1-sigmoid(v))
+
+def feedforward(model, X):
+    w1,w2 = model['w1'],model['w2'] #58x28, 29x2
+    z1 = X.dot(w1)  #mx58 * 58x28 = mx28  
+    a1 = sigmoid(z1)    #mx28
+    a1 = np.insert(a1,0,np.ones(a1.shape[0]),axis=1)  #mx29
+    z2 = a1.dot(w2) #mx29 * 29x2
+    h = sigmoid(z2) #mx2    
+    return h
+
+def restrictProb(a):
+    return min([max([a,1e-15]), 1-1e-15])
+
+def cost(model, X, y):    
+    m = X.shape[0]
+    h = feedforward(model, X)
+    
+    y2 = y.astype(float)
+    vf = np.vectorize(restrictProb)
+    py = vf(h)
+
+    loss = -(1.0/m)*np.sum(y*np.log(py)+(1-y)*np.log(1-py))  #mx2 .* mx2
+    #regularize
+    w1,w2 = model['w1'],model['w2']
+    loss += model['lambda']*(np.sum(np.square(w1)) + np.sum(np.square(w2)))/(2*m)
+    return loss
+
+def predict(model, x):
+    w1,w2 = model['w1'],model['w2']
+    #print(x.shape, w1.shape)
+    z1 = x.dot(w1)  #1x58 * 58x28 = 1x28
+    a1 = sigmoid(z1)    #1x28
+    a1 = np.insert(a1,0,1)
+    z2 = a1.dot(w2)  #1x29 x 29x2
+    h = sigmoid(z2)
+    return 1-np.argmax(h)
+
+
+def fit(model, X, y, passes=1000):
+    m = X.shape[0]
+    w1,w2 = model['w1'],model['w2'] #58x28, 29x2
+    li,lh,lo=model['li'],model['lh'],model['lo']
+    
+    for i in range(passes):
+        z1 = X.dot(w1)  #mx58 * 58x28 = mx28  
+        a2 = sigmoid(z1)    #mx28
+        a2 = np.insert(a2,0,np.ones(a2.shape[0]),axis=1)  #mx29
+        z2 = a2.dot(w2) #mx29 * 29x2
+        h = sigmoid(z2) #mx2
+
+        #backpropagation
+        del3 = h-y  #mx2
+        z1 = np.insert(z1,0,np.ones(z1.shape[0]),axis=1)
+        #mx29
+        del2 = del3.dot(w2.reshape(lo,lh+1))*sigmoidDiff(z1)
+        #mx2 * 2x29 .* mx29 = mx29
+        del2 = del2[:,1:]   #mx28
+        dw1 = np.dot(X.T,del2)    #58xm*mx28=58x28
+        dw2 = (a2.T).dot(del3)    #29xm*mx2=29x2
+
+        dw1 += (model['lambda']/m)*w1
+        dw2 += (model['lambda']/m)*w2
+
+        w1 += -model['eta']*dw1
+        w2 += -model['eta']*dw2
+
+        model['w1'] = w1
+        model['w2'] = w2
+
+        if i % (passes/10)==0:
+            print(i,cost(model, X, y))
+
+    return model
+
+
+if __name__ == "__main__":
+    np.random.seed(47)
+    model = {}
+    model = {'li':57,'lh':85,'lo':2,'lambda':0.05,'eta':0.01}    
+    # model['w1'] = np.random.randn(model['li']+1, model['lh'])/np.sqrt(model['li']+1)  #58x28
+    # model['w2'] = np.random.randn(model['lh']+1, model['lo'])/np.sqrt(model['lh']+1)    #29x2
+    model['w1'] = np.random.rand(model['li']+1, model['lh'])*0.24 - 0.12
+    model['w2'] = np.random.rand(model['lh']+1, model['lo'])*0.24 - 0.12
+
+    X,y = getData("Train.csv")
+    tX,ty = getData("TestX.csv",isTrain=False)
+    #cost(model, X, y)
+    # for h in [57/3, 57/2, 2*57/3, 57, 3*57/2]:
+    #     h=int(h)
+    #     model = {'li':57,'lh':h,'lo':2,'lambda':0.1,'eta':0.1}    
+    #     model['w1'] = np.random.randn(model['li']+1, model['lh'])/np.sqrt(model['li']+1)  #58x28
+    #     model['w2'] = np.random.randn(model['lh']+1, model['lo'])/np.sqrt(model['lh']+1)    #29x2
+        
+    model = fit(model, X, y)
+
+    m = X.shape[0]
+    py,y2=[],[]
+    for i,row in enumerate(tX):
+        ans = predict(model, np.array(row))
+        py.append(ans)
+        # y2.append(1 if y[i][0]==1 else 0)
+
+    #     acc = m-np.sum(abs(np.array(py)-np.array(y2)))
+    #     print(h, acc*100/m)
+
+    with open("answer.txt","w") as wfile:
+        writer = csv.writer(wfile)
+        writer.writerow(['Id','Label'])
+        for i,ans in enumerate(py):
+            writer.writerow([i,ans])
+
+    # acc = m-np.sum(abs(np.array(py)-np.array(y2)))
+    # print(acc*100/m)
\ No newline at end of file