Commit d6cc2051 authored by Tarun's avatar Tarun

commitfinal

parents
This source diff could not be displayed because it is too large. You can view the blob instead.
Ayush Gupta 203050034
Tarun Saurabh 203050009
Ankit Gaur 203050081
Abhijeet Pratap Singh 203059001
#CREDIT
National Institute of Standards and Technology, US dept. of commerce
TO RUN THIS PROJECT GO TO https://colab.research.google.com/
UPLOAD THE NOTEBOOK ProjectFMLbasic.py
upload the dataset https://s3.amazonaws.com/nist-srd/SD19/by_class.zip
onto the drive.. mount the drive into colab. preprocess data using preprocessing_notes.py
And run ProjectFMLbasic.py
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import csv
import xmltodict
import numpy as np
from os import listdir
from os.path import isfile, join
import cv2
# In[ ]:
class data:
def __init__(self):
self.file_name = 'name_string.csv'
self.set_bag_of_words = {'made','the'}#initialize
def extract_word(self, directory_path):
# dir_1 = 'formsA-D'
# dir_2 = 'formsE-H'
# dir_3 = 'formsI-Z'
list_of_pairs = []
files = [f for f in listdir('xml') if isfile(join('xml', f))]
for x in range(len(files)):
with open('./xml/'+files[x], 'r') as f:
data = xmltodict.parse(f.read())
for i in range(len(data['form']['handwritten-part']['line'])):
num = len(data['form']['handwritten-part']['line'][i]['word'])
for j in range(num):
try:
word = data['form']['handwritten-part']['line'][i]['word'][j]['@text']
#file name of word
img_filename = data['form']['handwritten-part']['line'][i]['word'][j]['@id']
except:
word = data['form']['handwritten-part']['line'][i]['word']['@text']
#file name of word
img_filename = data['form']['handwritten-part']['line'][i]['word']['@id']
temp = img_filename.split('-')
path = 'words'+'/'+temp[0]+'/'+temp[0]+'-'+ temp[1]+'/'+img_filename+'.png'
# if ord(img_filename[0]) < ord('e'):
# path = dir_1 + '/' + img_filename + '.png'
# elif ord(img_filename[0]) < ord('i'):
# path = dir_2 + '/' + img_filename + '.png'
# else:
# path = dir_3 + '/' + img_filename + '.png'
list_of_pairs.append([path, word])
self.set_bag_of_words.add(word)
with open(self.file_name,'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["image_path", "word"])
for r in range(len(list_of_pairs)):
writer.writerow(list_of_pairs[r])
# In[ ]:
d1 = data()
d1.extract_word('trtr')
# In[ ]:
len(d1.set_bag_of_words)
# In[ ]:
with open('bag_of_words.txt', 'w') as f:
for item in list(d1.set_bag_of_words):
f.write("%s\n" % item)
# In[ ]:
# In[4]:
import cv2
with open('bag_of_words.txt', 'r') as f:
data = f.read()
# In[5]:
d = data.split('\n')[:-2]
# In[ ]:
# In[ ]:
row[0]
with open(row[0], 'r') as file:
a = file.read()
# In[6]:
with open('name_string.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
print(row)
# In[9]:
bag_of_words = {'thus', 'a'}
with open('name_string.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
bag_of_words.add(row[1])
# In[10]:
bag = list(bag_of_wors)
with open('bag_of_words.txt', 'w') as f:
for item in bag:
f.write("%s\n" % item)
# In[ ]:
# In[45]:
import os
import shutil
root_path = 'dataset_classwise_0/'
folders = bag
for folder in folders:
if folder != '/':
try:
n = len(os.listdir(root_path + folder))
if n < 20:
print(root_path + folder)
shutil.rmtree(root_path +folder+'/')
elif n > 20:
r = n - 20
for i in range(r):
pt = root_path +folder+'/'+os.listdir(root_path + "/" + folder)[i]
os.remove(pt)
print(n, pt)
except:
a = 10#print(folder)
# In[17]:
folders = bag
for folder in folders:
pt = folder+'/'+os.listdir(root_path + "/" + folder)[0]
print(pt)
# In[27]:
root_path = 'about.png'
print(root_path)
image = cv2.imread(root_path)
print(image)
cv2.imshow('image',image)
# In[29]:
import shutil, os
with open('name_string.csv', 'r') as file:
reader = csv.reader(file)
first_row = next(reader)
for row in reader:
if row[1] != '/':
src = row[0]
dst = 'dataset_classwise/' + row[1]+'/'
shutil.copy(src, dst)
# In[8]:
# Save image in set directory
# Read RGB image
with open('name_string.csv', 'r') as file:
reader = csv.reader(file)
first_row = next(reader)
for row in reader:
try:
img = cv2.imread(row[0])
# print('/'+row[0])
img_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Output img with window name as 'image'
r, c = img_grey.shape
ratio = c/r
new_row = 32
new_col = c
if r>32:
new_col = c - int((r-32)*(ratio))
if new_col > 128:
new_col = 128
resized = cv2.resize(img_grey, (new_col,new_row), interpolation = cv2.INTER_AREA)
rw,cl = resized.shape
image = cv2.copyMakeBorder(resized,0 , 0, 0, 128-cl, cv2.BORDER_CONSTANT, value = [255,255])
#print(image.shape)
filename = 'Words/'+row[0][6:]
cv2.imwrite(filename, image)
#cv2.imshow('image', image)
# Maintain output window utill
# user presses a key
#cv2.waitKey(0)
# Destroying present windows on screen
#cv2.destroyAllWindows()
except:
print('error at', row)
# In[ ]:
# In[ ]:
## image thresholding
def image_threshold():
with open('name_string.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
img = cv2.imread(row[0])
print('/'+row[0])
# Output img with window name as 'image'
cv2.imshow('image', img)
# Maintain output window utill
# user presses a key
cv2.waitKey(0)
# Destroying present windows on screen
cv2.destroyAllWindows()
# In[18]:
import os
root = os.listdir('by_class/')
mi = 99999
for i in range(len(root)):
letter_dir = os.listdir('by_class/'+root[i])
if mi > len(letter_dir):
mi = len(letter_dir)
print(mi)
# In[19]:
root = os.listdir('by_class/')
print(root)
mi = 0
for i in range(len(root)):
letter_dir = os.listdir('by_class/'+root[i])
print(letter_dir)
for j in range(len(letter_dir) - 1800):
path = 'by_class/'+root[i] +'/'+letter_dir[j]
os.remove(path)
# In[2]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img1 = cv2.imread('1.png',cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread('2.png',cv2.IMREAD_GRAYSCALE)
img3 = cv2.imread('3.png',cv2.IMREAD_GRAYSCALE)
img4 = cv2.imread('4.png',cv2.IMREAD_GRAYSCALE)
(thresh, im_bw1) = cv2.threshold(img1, 128, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
(thresh, im_bw2) = cv2.threshold(img2, 128, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
(thresh, im_bw3) = cv2.threshold(img3, 128, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
(thresh, im_bw4) = cv2.threshold(img4, 128, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# In[3]:
imgplot = plt.imshow(im_bw2/255)
# In[ ]:
# In[4]:
#find average letter width
def segment_into_letters(img):
im_bw2 = img/255
histo = img.sum(axis=0)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(range(0,len(histo)),histo)
plt.show()
window = np.ones(4)
sc = []
#convolution
for i in range(len(histo)-len(window)):
summation = 0
for j in range(len(window)):
summation += histo[i+j]*window[j]
if summation == 0:#if noisy then adjust accordingly
sc.append(i+1)
return(sc)
# In[5]:
new_histo = segment_into_letters(im_bw2)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(range(0,len(new_histo)),new_histo)
plt.show()
# In[6]:
#finding diffrentiation with sobel in x
def diffrentiate(X):
#CONVLOVE WITH DIIFRENTIAL OPERATOR
d = [-1, 0, 1]
pos_of_highest_change = []
for i in range(len(X) - 3):
summation = 0
for j in range(len(d)):
summation += X[i+j]*d[j]
if summation > 2:
pos_of_highest_change.append(X[i])
print(X[i-1])
pos_of_highest_change.append(pos_of_highest_change[-1]+1)
pos_of_highest_change.append(X[-1])
return(pos_of_highest_change)
# In[7]:
pixel = diffrentiate(new_histo)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(range(0,len(pixel)),pixel)
plt.show()
pixel
# In[8]:
imgplot = plt.imshow(im_bw2/255)
new_histo
# In[10]:
letter = im_bw2/255
i = 0
list_of_letters = []
while(i<len(pixel)-1):
imgplot = plt.imshow(letter[:,pixel[i]:pixel[i+1]])
list_of_letters.append(letter[:,pixel[i]:pixel[i+1]])
print(pixel[i],pixel[i+1])
plt.show()
i = i+2
# In[84]:
#crop resize and border charachter
def char_preprocess(img):
#find vertical projection
vertical_projection = np.sum(img,axis=1)
#find first and last non zeros projection
left_index = 0
right_index = 0
for i in range(len(vertical_projection)):
if (vertical_projection[i] != 0):
left_index = i - 1
break
for j in range(len(vertical_projection)-1,0,-1):
if (vertical_projection[j] != 0):
right_index = j + 1
break
#crop image
img_cropped = img[left_index: right_index,:]
#resize image
img_cropped_resized = cv2.resize(img_cropped, (int(img_cropped.shape[1]*.7), int(img_cropped.shape[0]*.7)), interpolation = cv2.INTER_AREA)
imgplot = plt.imshow(img_cropped_resized)
plt.show()
#fill boder
h = img_cropped_resized.shape[0]
w = img_cropped_resized.shape[1]
thickness_h = int((64-h)/2)
thickness_w = int((64-w)/2)
print(thickness_h,thickness_w,h,w)
normalized_img = cv2.copyMakeBorder(img_cropped_resized, thickness_h, thickness_h, thickness_w,thickness_w, cv2.BORDER_CONSTANT, value = [0,0])
return(normalized_img)
# In[94]:
image_normalized = char_preprocess(list_of_letters[2])
# In[95]:
imgplot = plt.imshow(image_normalized)
plt.show()
# In[93]:
imgplot = plt.imshow(cv2.resize(list_of_letters[2], (64,64), interpolation = cv2.INTER_AREA))
plt.show()
# In[103]:
# import os
# classes = os.listdir('by_class/')
# char = ['A', 'Q','u','i','c','k','B','r','o','w','n','F','o','x','J','u','m','p','s','O','v','e','r','T','h','e','L','a','z','y','D','o','g']
# l = []
# for each_char in char:
# list_of_char = os.listdir('by_class/'+each_char+'/')
# img = plt.imread('by_class/'+each_char+'/'+list_of_char[50],)
# l.append(img)
# r = plt.imshow(img)
# plt.show()
# In[104]:
# In[121]:
# stich = np.zeros([128,1,3])
# print(l[1].shape)
# for i in range(33):
# stich = np.append(stich,l[i],axis=1)
# stich.shape
# In[123]:
# plt.imshow(stich)
# plt.savefig('stich.png')
# In[ ]:
# -*- coding: utf-8 -*-
"""ProjectFMLbasic.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1icJA6QWGWqOp8l8PLIqGI99uUdBfbf5P
Unzipping th
"""
from google.colab import drive
# importing required modules
from zipfile import ZipFile
# specifying the zip file name
file_name = "/content/drive/MyDrive/words/Copy of final_dataset_1800.zip"
# opening the zip file in READ mode
with ZipFile(file_name, 'r') as zip:
# printing all the contents of the zip file
zip.printdir()
# extracting all the files
# print('Extracting all the files now...')
zip.extractall()
print('Done!')
"""Training of model
**STARTS_HERE**
"""
import csv
import cv2
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as im
from tensorflow.keras import datasets, layers, models
from keras.preprocessing.image import img_to_array, load_img
root_path = '/content/by_class/'
classes = os.listdir(root_path)
#print(len(classes))
x_train = []
y_train = []
y_train_dict = {}
index = 0
for each_class in classes[10:]:
#print(type(each_class))
if each_class not in [ '0' , '1' ,'2','3','4','5','6','7','8','9']:
#...each_class is having 20 photos:
curr_path = root_path + each_class
curr_dir = os.listdir(curr_path)
for image in curr_dir[:1000]:
curr_image = curr_path + '/' + image
image = cv2.imread(curr_image, cv2.IMREAD_GRAYSCALE)
ret,new_img = cv2.threshold(image,200,255,cv2.THRESH_BINARY_INV)
new_img = cv2.resize(new_img, (128, 128))
x_train.append(new_img)
y_train.append(index)
y_train_dict[index] = each_class
index += 1
print('Done...')
x_train = np.array(x_train)
photos = x_train.shape[0]
x_train = x_train.reshape(photos ,128,128,1)
y_train = np.array(y_train)
#print(x_train[0].shape)
print('Done...')
from tensorflow.keras import datasets, layers, models
from keras.layers import Dense, Dropout, Flatten, Lambda, ELU, Activation, BatchNormalization
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(62 , activation='softmax'))
#model.summary()
print('Done...')
from tensorflow.keras import losses
from tensorflow import keras
model.compile(
optimizer=keras.optimizers.RMSprop(), # Optimizer
# Loss function to minimize
loss=keras.losses.SparseCategoricalCrossentropy(),
# List of metrics to monitor
metrics=[keras.metrics.SparseCategoricalAccuracy()],
)
model.fit(x_train , y_train , epochs=10)
image = cv2.imread('/content/by_class/1/train_31_00003.png', cv2.IMREAD_GRAYSCALE)
ret,new_img = cv2.threshold(image,200,255,cv2.THRESH_BINARY_INV)
test_img = cv2.resize(new_img, (128,128))
plt.imshow(test_img , cmap='gray')
plt.show()
test = np.zeros((1,128,128,1))
test[0] += test_img.reshape(128,128,1)
pred = model.predict(test)
index = np.argmax(pred)
print(y_train_dict[index])
"""**Line segmentation**"""
image = cv2.imread('/content/drive/MyDrive/test5.jpeg', cv2.IMREAD_GRAYSCALE)
ret,new_img = cv2.threshold(image,200,255,cv2.THRESH_BINARY_INV)
new_img = cv2.copyMakeBorder(new_img, 10, 10, 10, 10, cv2.BORDER_CONSTANT , value = 0)
horizontal_histogram = np.zeros((new_img.shape[0] , 1))
print(horizontal_histogram.shape)
index = 0
while index in range(horizontal_histogram.shape[0]):
horizontal_histogram[index]=np.sum(new_img[index][:])
index += 1
#print(horizontal_histogram)
index = 0
SC_list = []
SC = 0
even_odd = 0
#Defining threshold for line as 1pixel
for index in range(horizontal_histogram.shape[0]):
if horizontal_histogram[index] > 0 and ( even_odd % 2 == 0 ):
SC = np.mean(np.arange(SC,index-1))
SC_list.append(SC)
even_odd += 1
elif horizontal_histogram[index] == 0 and (even_odd % 2 == 1):
SC = index
even_odd += 1
SC = np.mean(np.arange(SC,index-1))
SC_list.append(SC)
SC_list = np.ceil(SC_list)
#for each in SC_list:
# new_img[int(each)][:] = 255
#new_img = cv2.resize(new_img, (64, 64))
plt.imshow(new_img , cmap='gray')
print(np.ceil(SC_list))
"""**Slicing of the image line-wise**"""
lines = []
counter = 0
limit = len(SC_list)
while counter < limit-1:
iin = int(SC_list[counter])
fin = int(SC_list[counter+1])
if fin - iin > 75:
lines.append(new_img[iin:fin+1][:])
counter += 1
for index in range(len(lines)):
plt.imshow(lines[index], cmap='gray')
plt.show()
lines_new = []
for line in lines:
vertical_projection = np.sum(line/255 , axis=0)
i=0
j=vertical_projection.shape[0]-1
while i < vertical_projection.shape[0]-1:
if (vertical_projection[i]>0):
#print(i)
break
i += 1
while j > 0:
if (vertical_projection[j]>0):
#print(j)
break
j -= 1
plt.imshow(line[:,i-10:j+10] , cmap='gray')
plt.show()
lines_new.append(line[:,i-10:j+10])
lines = lines_new
for index in range(len(lines)):
plt.imshow(lines[index], cmap='gray')
plt.show()
"""**Character Preprocessing and padding**"""
#crop resize and border charachter
def char_preprocess(img):
#find vertical projection
vertical_projection = np.sum(img,axis=1)
#find first and last non zeros projection
left_index = 0
right_index = 0
for i in range(len(vertical_projection)):
if (vertical_projection[i] != 0):
left_index = i - 1
break
for j in range(len(vertical_projection)-1,0,-1):
if (vertical_projection[j] != 0):
right_index = j + 1
break
#crop image
img_cropped = img[left_index: right_index,:]
#resize image
normalized_img = cv2.resize(img_cropped, (int(img_cropped.shape[1]*.3), int(img_cropped.shape[0]*.3)), interpolation = cv2.INTER_AREA)
#imgplot = plt.imshow(img_cropped_resized , cmap='gray')
#plt.show()
#fill boder
h = normalized_img.shape[0]
w = normalized_img.shape[1]
if h <= 128 and w <= 128:
thickness_h = int((128-h)/2)
thickness_w = int((128-w)/2)
#print(thickness_h,thickness_w,h,w)
normalized_img = cv2.copyMakeBorder(normalized_img, thickness_h, thickness_h, thickness_w,thickness_w, cv2.BORDER_CONSTANT, value = [0,0])
#imgplot = plt.imshow(normalized_img , cmap='gray')
#plt.show()
normalized_img = cv2.resize(normalized_img , (128,128) , interpolation = cv2.INTER_AREA)
return(normalized_img)
"""**Segmenting Each word in the Line**"""
corrected_words = []
for line in lines:
SC_list = []
SC_list.append(0)
#.....For each line processing....
words = []
vertical_projection = np.zeros((1,line.shape[1]))
vertical_projection = np.sum(line/255,axis=0)
#...code for SC/PSC
cont_zeros = 0
even_odd = 0
SC = 0
threshold = 100
for counter in range(line.shape[1]):
if vertical_projection[counter] == 0:
cont_zeros += 1
if cont_zeros > threshold:
SC_list.append(counter-50)
cont_zeros = 0
else:
cont_zeros = 0
SC_list.append(counter)
SC_list= np.ceil(SC_list)
plt.imshow(line , cmap='gray')
plt.show()
#print(SC_list)
#.........................................
counter = 0
limit = len(SC_list)
while counter < limit-1:
iin = int(SC_list[counter])
fin = int(SC_list[counter+1])
words.append(line[:,iin:fin+1])
counter += 1
index = 0
limit = len(words)
while index < limit:
if np.sum(words[index]) == 0:
del words[index]
limit -= 1
index += 1
# for index in range(len(words)):
# plt.imshow(words[index] , cmap='gray')
# plt.show()
#..........................................
#Character Segmentation
for word in words:
tempword = ''
char_projection = np.sum(word/255 , axis=0)
sp = 0
sp_list = [0]
limit = char_projection.shape[0]
even_odd = 0
for index in range(limit):
if char_projection[index] > 0 and even_odd % 2 == 0:
sp = np.mean(np.arange(sp, (index-1)))
sp_list.append(sp)
even_odd += 1
elif char_projection[index] == 0 and even_odd % 2 == 1:
sp = index
even_odd += 1
sp = np.mean(np.arange(sp, (index-1)))
sp_list.append(sp)
sp_list = np.ceil(sp_list)
#print(sp_list)
#..........................................
images = []
counters = 0
l = len(sp_list)
while counters < l-1:
iin = int(sp_list[counters])
fin = int(sp_list[counters+1])
images.append(word[:,iin:fin+1])
counters += 1
#.........
index = 0
limit = len(images)
while index < limit:
if np.sum(images[index]) == 0:
del images[index]
limit -= 1
index += 1
for image in images:
#image = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
#plt.imshow(image,cmap='gray')
#plt.show()
#ret,new_img = cv2.threshold(image,200,255,cv2.THRESH_BINARY)
#test_img = cv2.resize(new_img, (64,64))
test_img = char_preprocess(image)
#plt.imshow(test_img , cmap='gray')
#plt.show()
#plt.imshow(test_img , cmap='gray')
#plt.show()
test = np.zeros((1,128,128,1))
test[0] += test_img.reshape(128,128,1)
pred = model.predict(test)
index = np.argmax(pred)
print(y_train_dict[index] , end = '')
print(' ' , end='')
print('')
#..........................................
from sklearn.metrics import confusion_matrix
index = 0
x_test = []
y_test = []
y_test_dict = {}
for each_class in classes[10:]:
#print(type(each_class))
if each_class not in [ '0' , '1' ,'2','3','4','5','6','7','8','9']:
#...each_class is having 20 photos:
curr_path = root_path + each_class
curr_dir = os.listdir(curr_path)
for image in curr_dir[1000:1010]:
curr_image = curr_path + '/' + image
image = cv2.imread(curr_image, cv2.IMREAD_GRAYSCALE)
ret,new_img = cv2.threshold(image,200,255,cv2.THRESH_BINARY_INV)
new_img = cv2.resize(new_img, (128, 128))
x_test.append(new_img)
y_test.append(index)
y_test_dict[index] = each_class
index += 1
print('Done...')
y_predict_test = []
for each in x_test:
each = cv2.resize(each , (128,128) , interpolation = cv2.INTER_AREA)
test = np.zeros((1,128,128,1))
test[0] += each.reshape(128,128,1)
pred = model.predict(test)
index = np.argmax(pred)
y_predict_test.append(index)
print('out')
cm = confusion_matrix(y_true=y_test, y_pred=y_predict_test)
print(cm)
import seaborn as sns
pplot = sns.heatmap(cm)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment