Initial commit

98dae565 · Saswat · 98dae565 · 98dae565 · 98dae565 · 98dae565
Commit 98dae565 authored May 05, 2023 by Saswat
29 changed files
--- a/assignment1/README.txt
+++ b/assignment1/README.txt
+Team Members:
+1. Singamsetty Sandeep (213050064)
+2. Saswat Meher (22m0804)
+Requirements:
+Pytorch
+Python3
+Keras
+Transformers
+Datasets
+numpy
+scipy
+sklearn
+tensorflow
+sklearn_crfsuite
+Hardware Requirements:
+>15 GB Ram
+For BERT based model: GPU
+Setup before running:
+preprocesses data can be donwloaded from: https://drive.google.com/drive/folders/13bpmwRF5TV9ssU8eQgwXlvaLvp0nvqvW?usp=sharing
+from folder additional. those are (train/test/dev)_data_preprocessed.json
+keep them in the same folder as code.
+Execution and Reproducing Results:
+CRF:
+Folder: /code/CRF_NER
+File: crf_sklearn.py
+To train using test data and predict for test data 
+Run: python crf_sklearn.py
+Make sure the files used in this code, train_data_preprocessed.json etc. exist in the same folder.
+You can also try running CRF.py which is an extention Prof. Soumen's code. But will take more that 6Hrs
+for 1 epoch in cpu. Was failing to load in GPU because of the size.
+LSTM:
+Folder: /code/LSTM_NER
+File: lstm_ner.py
+To train call the train() function.
+The test() function is also included to test for one example.
+All the predefined parameters are included. (epochs etc.)
+Make sure the files used in this code, train_data_preprocessed.json etc. exist in the same folder.
+BERT:
+Folder: /code/BERT_NER
+File: bert_ner.py
+To train call the train() function.
+The test() function is also included to test for one example.
+Note: 
+All the predefined parameters are included. (epochs etc.)
+Make sure the files used in this code, train_data_preprocessed.json etc. exist in the same folder.
+Also make sure you have GPU. Also, for seqeval of distilbert-base-uncased, Nvidia libraries are required. Depends on GPU configuration, so not mentioning them here. The model checkpoint used Hugging face API to access the base model, so make sure connection is accessible.
+Erasing Concepts from Diffusion Models (ESD)
+Our project aims to address concerns regarding large-scale diffusion models producing undesirable output such as sexually inappropriate content or copyrighted artistic styles. We analyze a fine-tuning method that can erase specific visual concepts from pre-trained diffusion model weights, given only the name of the style.
+This approach offers several advantages over existing methods. Firstly, we can remove concepts from a diffusion model permanently, rather than modifying the output at inference time. This makes the method more secure and less prone to being circumvented even if a user has access to model weights. Secondly, we want to perform comparative analysis of the ESD method against existing approaches and demonstrate its effectiveness.
+Overall, our project seeks to analyze the current novel method for erasing specific visual concepts from diffusion model weights, addressing concerns about the production of undesirable output.
+Related Papers:
+https://arxiv.org/pdf/2303.07345.pdf
+Ideas from: 
+https://arxiv.org/abs/2207.12598
+https://arxiv.org/abs/2004.06030
+https://arxiv.org/pdf/2211.05105.pdf SLD
+https://proceedings.mlr.press/v162/ravfogel22a.html
+https://arxiv.org/abs/2111.08947
+https://arxiv.org/abs/2007.15646
+https://arxiv.org/abs/2209.02299
+https://arxiv.org/abs/1912.03817
+https://arxiv.org/abs/2104.08164 for LMs
+https://arxiv.org/abs/1911.04933
+https://arxiv.org/abs/2303.05699
+Timeline:
+Week 1:
+Studying ESD paper and its references
+Week 2:
+Training diffusion model on custom constraints
+Week 3:
+Running various experiments, to analyse performance of our model.
+Week 4:
+Documentation of experiments and results, and exploring future works.
--- a/assignment1/Report.pdf
+++ b/assignment1/Report.pdf
--- a/assignment1/additional/evaluation_of_ner.py
+++ b/assignment1/additional/evaluation_of_ner.py
+import json
+def f1(p, r):
+    if r == 0.:
+        return 0.
+    return 2 * p * r / float( p + r )
+def loose_macro(true, pred):
+    num_entities = len(true)
+    p = 0.
+    r = 0.
+    for true_labels, predicted_labels in zip(true, pred):
+        if len(predicted_labels) > 0:
+            p += len(set(predicted_labels).intersection(set(true_labels))) / float(len(predicted_labels))
+        if len(true_labels):
+            r += len(set(predicted_labels).intersection(set(true_labels))) / float(len(true_labels))
+    precision = p / num_entities
+    recall = r / num_entities
+    return precision, recall, f1( precision, recall)
+def loose_micro(true, pred):
+    num_predicted_labels = 0.
+    num_true_labels = 0.
+    num_correct_labels = 0.
+    for true_labels, predicted_labels in zip(true, pred):
+        num_predicted_labels += len(predicted_labels)
+        num_true_labels += len(true_labels)
+        num_correct_labels += len(set(predicted_labels).intersection(set(true_labels))) 
+    if num_predicted_labels > 0:
+        precision = num_correct_labels / num_predicted_labels
+    else:
+        precision = 0.
+    recall = num_correct_labels / num_true_labels
+    return precision, recall, f1( precision, recall)
+with open('test_data_predicted_LSTM_2.json') as f:           #change the file name as required (in /output folder)
+    data = json.load(f)
+predicted_list = []
+for i in range(len(data)):
+    tags_list = data[i]['tags']
+    predicted_list+= tags_list
+with open('test_data_preprocessed.json') as f:              #can be found in /additional folder
+    data = json.load(f)
+true_list = []
+for i in range(len(data)):
+    tags_list = data[i]['tags']
+    true_list+= tags_list
+print(len(predicted_list),len(true_list))
+precision,recall,f1_micro = loose_micro(true_list,predicted_list)
+print(precision,recall,f1_micro)
+precision,recall,f1_macro = loose_macro(true_list,predicted_list)
+print(precision,recall,f1_macro)
+matched_count = 0
+for i in range(len(true_list)):
+    if true_list[i] == predicted_list[i]:
+        matched_count+=1
+print(matched_count)
--- a/assignment1/additional/preprocessing_ner_data.py
+++ b/assignment1/additional/preprocessing_ner_data.py
+import json
+def tag_list_deducer(tag_sub_list):
+	res = max(tag_sub_list,key=len)
+	return res
+with open('train.json') as f:     #change files as required
+	data = json.load(f)
+for i in range(len(data)):
+	tags_list = data[i]['tags']
+	temp_tags_list = []
+	for item in tags_list:
+		if isinstance(item,str):
+			temp_tags_list.append(item)
+		elif isinstance(item,list):
+			temp_tags_list.append(tag_list_deducer(item))
+	data[i]['tags'] = temp_tags_list
+with open('train_data_preprocessed.json',"w") as final:
+	json.dump(data,final,indent=2)
--- a/assignment1/code/BERT_NER/bert_ner.py
+++ b/assignment1/code/BERT_NER/bert_ner.py
+import os
+import itertools
+import pandas as pd
+import numpy as np
+from datasets import Dataset
+from datasets import load_metric
+from transformers import AutoTokenizer
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+from transformers import DataCollatorForTokenClassification
+import torch
+import json
+with open('train_data_preprocessed.json') as f:
+	data = json.load(f)
+train_sentences, train_tags = [], []
+for i in range(len(data)):
+	tags_list = data[i]['tags']
+	sent_list = data[i]['sent']
+	train_sentences.append(np.array(sent_list))
+	train_tags.append(np.array(tags_list))
+with open('test_data_preprocessed.json') as f:
+	test_data = json.load(f)
+test_sentences, test_tags = [], []
+for i in range(len(test_data)):
+	tags_list = data[i]['tags']
+	sent_list = data[i]['sent']
+	test_sentences.append(np.array(sent_list))
+	test_tags.append(np.array(tags_list))
+words, tags = set([]), set([])
+for s in train_sentences:
+	for w in s:
+		words.add(w)
+for ts in train_tags:
+	for t in ts:
+		tags.add(t)
+label_list = list(tags)
+task = "ner" 
+model_checkpoint = "distilbert-base-uncased"
+batch_size = 16
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+def get_all_tokens_and_ner_tags(name):
+	return pd.concat([get_tokens_and_ner_tags(name)]).reset_index().drop('index', axis=1)
+def get_tokens_and_ner_tags(name):
+	if name=='train': 
+		return pd.DataFrame({'tokens':train_sentences , 'ner_tags':train_tags })
+	if name=='test':
+		return pd.DataFrame({'tokens':test_sentences , 'ner_tags':test_tags })
+def get_dataset():
+	train_df = get_all_tokens_and_ner_tags('train')
+	test_df = get_all_tokens_and_ner_tags('test')
+	train_dataset = Dataset.from_pandas(train_df)
+	test_dataset = Dataset.from_pandas(test_df)
+	return (train_dataset, test_dataset)
+train_dataset, test_dataset = get_dataset()
+print(train_dataset)
+def tokenize_and_align_labels(examples):
+	label_all_tokens = True
+	tokenized_inputs = tokenizer(list(examples["tokens"]), truncation=True, is_split_into_words=True)
+	labels = []
+	for i, label in enumerate(examples[f"{task}_tags"]):
+		word_ids = tokenized_inputs.word_ids(batch_index=i)
+		previous_word_idx = None
+		label_ids = []
+		for word_idx in word_ids:
+			if word_idx is None:
+				label_ids.append(-100)
+			elif label[word_idx] == '0':
+				label_ids.append(0)
+			previous_word_idx = word_idx
+		labels.append(label_ids)
+	tokenized_inputs["labels"] = labels
+	return tokenized_inputs
+train_tokenized_datasets = train_dataset.map(tokenize_and_align_labels, batched=True)
+test_tokenized_datasets = test_dataset.map(tokenize_and_align_labels, batched=True)
+print(train_tokenized_datasets)
+def train():
+	model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_list))
+	args = TrainingArguments(
+		f"test-{task}",
+		evaluation_strategy = "epoch",
+		learning_rate=1e-4,
+		per_device_train_batch_size=batch_size,
+		per_device_eval_batch_size=batch_size,
+		num_train_epochs=50,
+		weight_decay=1e-5,
+	)
+	data_collator = DataCollatorForTokenClassification(tokenizer)
+	metric = load_metric("seqeval")
+	def compute_metrics(p):
+		predictions, labels = p
+		predictions = np.argmax(predictions, axis=2)
+		true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
+		true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
+		results = metric.compute(predictions=true_predictions, references=true_labels)
+		return {"precision": results["overall_precision"], "recall": results["overall_recall"], "f1": results["overall_f1"], "accuracy": results["overall_accuracy"]}
+	trainer = Trainer(
+		model,
+		args,
+		train_dataset=train_tokenized_datasets,
+		eval_dataset=test_tokenized_datasets,
+		data_collator=data_collator,
+		tokenizer=tokenizer,
+		compute_metrics=compute_metrics
+	)
+	trainer.train()
+	trainer.evaluate()
+	trainer.save_model('bert.model')
+def test():
+	tokenizer = AutoTokenizer.from_pretrained('./bert.model/')
+	sentence = 'Delhi is capital of India.'
+	tokens = tokenizer(sentence)
+	torch.tensor(tokens['input_ids']).unsqueeze(0).size()
+	model = AutoModelForTokenClassification.from_pretrained('./bert.model/', num_labels=len(label_list))
+	predictions = model.forward(input_ids=torch.tensor(tokens['input_ids']).unsqueeze(0), attention_mask=torch.tensor(tokens['attention_mask']).unsqueeze(0))
+	predictions = torch.argmax(predictions.logits.squeeze(), axis=1)
+	predictions = [label_list[i] for i in preds]
+	words = tokenizer.batch_decode(tokens['input_ids'])
+	print(words)
+	print(predictions)
+#train()
+#test()
\ No newline at end of file
--- a/assignment1/code/CRF_NER/CRF.py
+++ b/assignment1/code/CRF_NER/CRF.py
--- a/assignment1/code/CRF_NER/crf_sklearn.py
+++ b/assignment1/code/CRF_NER/crf_sklearn.py
+import json
+#import torch
+from itertools import chain
+#import nltk
+import sklearn
+from sklearn.model_selection import cross_val_predict, cross_val_score
+import sklearn_crfsuite
+from sklearn_crfsuite import scorers,CRF
+from sklearn_crfsuite.metrics import flat_classification_report
+from sklearn_crfsuite import metrics
+from sklearn.metrics import classification_report
+from tqdm import tqdm
+data_folder = "./"
+train_file = "train_data_preprocessed.json"
+dev_file = "dev_data_preprocessed.json"
+test_file = "test_data_preprocessed.json"
+#train_file = dev_file
+train_data = []
+test_data = []
+with open(data_folder+ train_file) as f:     #change files as required
+    train_data = json.load(f)
+with open(data_folder+ test_file) as f:     #change files as required
+    test_data = json.load(f)
+# Print tag stats and prepare tag dictionary.
+STATE_INIT = 0
+tag_name_to_id = dict()
+tag_name_to_id["init"] = STATE_INIT
+# for i in range(len(data)):
+#     for j in range(len(data[i]["tags"])):
+#         tag = data[i]["tags"][j]
+#         if tag not in tag_name_to_id:
+#             tag_name_to_id[tag] = len(tag_name_to_id)
+NUM_STATES = len(tag_name_to_id)
+#print("Number of states", NUM_STATES)
+def word2features(sent, i):
+    word = sent[i]
+    features = {
+        'bias': 1.0,
+        'word.lower()': word.lower(),
+        'word[-3:]': word[-3:],
+        'word[-2:]': word[-2:],
+        'word.isupper()': word.isupper(),
+        'word.istitle()': word.istitle(),
+        'word.isdigit()': word.isdigit(),
+    }
+    if i > 0:
+        word1 = sent[i-1]
+        features.update({
+            '-1:word.lower()': word1.lower(),
+            '-1:word.istitle()': word1.istitle(),
+            '-1:word.isupper()': word1.isupper(),
+        })
+    else:
+        features['BOS'] = True
+    if i < len(sent)-1:
+        word1 = sent[i+1]
+        features.update({
+            '+1:word.lower()': word1.lower(),
+            '+1:word.istitle()': word1.istitle(),
+            '+1:word.isupper()': word1.isupper(),
+        })
+    else:
+        features['EOS'] = True
+    return features
+def sent2features(sent):
+    return [word2features(sent["sent"], i) for i in range(len(sent["sent"]))]
+def sent2labels(sent):
+    return sent["tags"]
+def gen_data():
+    X_train = []
+    y_train = []
+    X_test = []
+    y_test = []
+    for sentence in tqdm(train_data, mininterval=1):
+        X_train.append(sent2features(sentence))
+        y_train.append(sentence["tags"])
+    for sentence in tqdm(test_data):
+        X_test.append(sent2features(sentence))
+        y_test.append(sentence["tags"])
+    return X_train,y_train, X_test, y_test
+X_train, y_train, X_test, y_test = gen_data()
+print(len(X_train))
+print(len(y_train))
+print(len(X_test))
+print(len(y_test))
+#Creating the CRF model
+crf = CRF(algorithm='lbfgs',
+          c1=0.25,
+          c2=0.3,
+          max_iterations=5,
+          all_possible_transitions=True,
+          verbose = True)
+crf.fit(X_train,y_train)
+y_pred_data = crf.predict(X_test)
+y_true = []
+y_pred = []
+for i in range(len(y_test)):
+    if (len(y_pred_data[i]) != len(y_test[i])): 
+        print(i,"pred: ",len(y_pred_data[i]))
+        print(i,"test: ",len(y_test[i]))
+        print()
+    y_true.extend(y_test[i])
+    y_pred.extend(y_pred_data[i])
+print(len(y_true))
+print(len(y_pred))
+report = classification_report(y_true, y_pred)
+print("Writing report into: report.txt")
+with open('report.txt', 'w') as f:
+    print(report, file=f)
+print(report)
+for i in tqdm(range(len(test_data))):
+    test_data[i]["tags"] = y_pred_data[i]
+print("Writing into output into: ", "pred_"+test_file)
+with open("test_data_predicted_CRF.json", 'w') as fp:
+    json.dump(test_data, fp)
--- a/assignment1/code/LSTM_NER/lstm_ner.py
+++ b/assignment1/code/LSTM_NER/lstm_ner.py
+import numpy as np
+import json
+with open('train_data_preprocessed.json') as f:
+	data = json.load(f)
+with open('test_data_preprocessed.json') as f:
+	test_data = json.load(f)
+train_sentences, train_tags = [], []
+for i in range(len(data)):
+	tags_list = data[i]['tags']
+	sent_list = data[i]['sent']
+	train_sentences.append(np.array(sent_list))
+	train_tags.append(np.array(tags_list))
+test_sentences, test_tags = [], []
+for i in range(len(test_data)):
+	tags_list = data[i]['tags']
+	sent_list = data[i]['sent']
+	test_sentences.append(np.array(sent_list))
+	test_tags.append(np.array(tags_list))
+words, tags = set([]), set([])
+for s in train_sentences:
+	for w in s:
+		words.add(w)
+for ts in train_tags:
+	for t in ts:
+		tags.add(t)
+print(tags)
+word2index = {w: i + 2 for i, w in enumerate(list(words))}
+word2index['-PAD-'] = 0  # The special value used for padding
+word2index['-OOV-'] = 1  # The special value used for OOVs
+tag2index = {t: i + 1 for i, t in enumerate(list(tags))}
+tag2index['-PAD-'] = 0  # The special value used to padding
+train_sentences_X, test_sentences_X, train_tags_y, test_tags_y = [], [], [], []
+for s in train_sentences:
+	s_int = []
+	for w in s:
+		try:
+			s_int.append(word2index[w])
+		except KeyError:
+			s_int.append(word2index['-OOV-'])
+	train_sentences_X.append(s_int)
+for s in test_sentences:
+	s_int = []
+	for w in s:
+		try:
+			s_int.append(word2index[w])
+		except KeyError:
+			s_int.append(word2index['-OOV-'])
+	test_sentences_X.append(s_int)
+for s in train_tags:
+	train_tags_y.append([tag2index[t] for t in s])
+for s in test_tags:
+	test_tags_y.append([tag2index[t] for t in s])
+print(train_sentences_X[0])
+print(test_sentences_X[0])
+print(train_tags_y[0])
+print(test_tags_y[0])
+MAX_LENGTH = len(max(train_sentences_X, key=len))
+print(MAX_LENGTH)
+from keras.preprocessing.sequence import pad_sequences
+train_sentences_X = pad_sequences(train_sentences_X, maxlen=MAX_LENGTH, padding='post')
+test_sentences_X = pad_sequences(test_sentences_X, maxlen=MAX_LENGTH, padding='post')
+train_tags_y = pad_sequences(train_tags_y, maxlen=MAX_LENGTH, padding='post')
+test_tags_y = pad_sequences(test_tags_y, maxlen=MAX_LENGTH, padding='post')
+print(train_sentences_X[0])
+print(test_sentences_X[0])
+print(train_tags_y[0])
+print(test_tags_y[0])
+from keras.models import Sequential
+from keras.layers import Dense, LSTM, InputLayer, Bidirectional, TimeDistributed, Embedding, Activation
+from tensorflow.keras.optimizers import Adam
+def to_categorical(sequences, categories):
+	cat_sequences = []
+	for s in sequences:
+		cats = []
+		for item in s:
+			cats.append(np.zeros(categories))
+			cats[-1][item] = 1.0
+		cat_sequences.append(cats)
+	return np.array(cat_sequences)
+def train():
+	model = Sequential()
+	model.add(InputLayer(input_shape=(MAX_LENGTH, )))
+	model.add(Embedding(len(word2index), 128))
+	model.add(Bidirectional(LSTM(256, return_sequences=True)))
+	model.add(TimeDistributed(Dense(len(tag2index))))
+	model.add(Activation('softmax'))
+	model.compile(loss='categorical_crossentropy',
+				  optimizer=Adam(0.001),
+				  metrics=['accuracy'])
+	model.summary()
+	cat_train_tags_y = to_categorical(train_tags_y, len(tag2index))
+	print(cat_train_tags_y[0])
+	model.fit(train_sentences_X, to_categorical(train_tags_y, len(tag2index)), batch_size=128, epochs=200, validation_split=0.2)
+	scores = model.evaluate(test_sentences_X, to_categorical(test_tags_y, len(tag2index)))
+	print(f"{model.metrics_names[1]}: {scores[1] * 100}")   # acc: 99.09751977804825
+	model.save('lstm_model') 
+def logits_to_tokens(sequences, index):
+	token_sequences = []
+	for categorical_sequence in sequences:
+		token_sequence = []
+		for categorical in categorical_sequence:
+			token_sequence.append(index[np.argmax(categorical)])
+		token_sequences.append(token_sequence)
+	return token_sequences
+from tensorflow import keras
+def test():
+	model = keras.models.load_model('lstm_model')
+	test_samples = [
+		"Running is very important for IIT Bombay students.".split()
+	]
+	print(test_samples)
+	test_samples_X = []
+	for s in test_samples:
+		s_int = []
+		for w in s:
+			try:
+				s_int.append(word2index[w.lower()])
+			except KeyError:
+				s_int.append(word2index['-OOV-'])
+		test_samples_X.append(s_int)
+	test_samples_X = pad_sequences(test_samples_X, maxlen=MAX_LENGTH, padding='post')
+	print(test_samples_X)
+	predictions = model.predict(test_samples_X)
+	print(predictions, predictions.shape)
+	print(logits_to_tokens(predictions, {i: t for t, i in tag2index.items()}))
+#train()
+#test()
--- a/assignment1/output/test_data_predicted_BERT.json
+++ b/assignment1/output/test_data_predicted_BERT.json
--- a/assignment1/output/test_data_predicted_CRF.json
+++ b/assignment1/output/test_data_predicted_CRF.json
--- a/assignment1/output/test_data_predicted_LSTM_2.json
+++ b/assignment1/output/test_data_predicted_LSTM_2.json
--- a/assignment2/README.md
+++ b/assignment2/README.md
+# CS728 Assignment 2
+**Singamsetty Sandeep (213050064)**
+**Saswat Meher (22m0804)**
+This file contains instruction on how to run the code and structure of the code.
+## Requirement
+*Execute All the commands from inside submit folder.
+Install required libraries by running the following command,
+```bash
+pip install  -r  requirements.txt  //  for  pip
+conda install  --file  requirements.txt  //  for  conda
+```
+## Folder Structure
+submit/
+>code/
+>>Q1.py // Contains both the model architecture, training and testing for Q1
+>>DTW.py // Contains the Model for DTW non_crossing and crossing
+>>Q3.py // Code for Training and testing of DTW models.
+>>dataset.py // Dataset class for Glue dataset.
+>output/
+>> Q1_test.txt // Finetuned model predicted output for Test dataset of GLUE.
+>> Q1_val.txt // Finetuned model predicted output for Validation dataset of GLUE.
+>> Q3_non_crossing_test.txt // DTW non Crossing model predicted output for Test dataset of GLUE.
+>> Q3_non_crossing_val.txt // DTW non Crossing model predicted output for Val dataset of GLUE.
+>> Q3_crossing_test.txt // DTW Crossing model predicted output for Test dataset of GLUE.
+>> Q3_crossing_val.txt // DTW Crossing model predicted output for Test dataset of GLUE.
+>additional_files/
+>>models/ // Contains save parameters for all the model (Q1,Q2,Q3).
+>README.md
+>Requirements.txt
+>Report.pdf
+## Method to reproduce the result and test the model
+### Q1
+#### Training
+To fine tune the the BERT-Tiny model with BST task. Use the following command.
+* Note this will overwrite the existing model params in additional folder. To only test, use the commands in the next segment.
+```bash
+python3 code/Q1.py -t
+```
+This will also generate results for validation and test dataset in "*outputs/*" directory
+#### Inference
+To test/infer the model use the following command:
+```bash
+python3 code/Q1.py -i
+```
+This will give the user a prompt to enter sentence 1 and sentence 2 respectively and It will show the Correlation score.
+### Q3
+#### Training
+To train a,b of tanh in DTW with **non crossing** constraint, run the following command.
+* Note this will overwrite the existing model params in additional folder. To only test, use the commands in the next segment.
+```bash
+python3 code/Q3.py -t
+```
+To train a,b of tanh in DTW with **crossing** constraint, add another arg "-c" to the above command.
+```bash
+python3 code/Q3.py -c -t
+```
+This will also generate results for validation and test dataset in "*outputs/*" directory with the names of the files as *{DTW_{non_crossing/crossing}_{val/test}.txt}*
+#### Inference
+To test/infer the model use the following command:
+```bash
+python3 Q3.py -i
+```
+Similar to training add "-c" for inference using crossing in above command.
+This will also give the user a prompt to enter sentence 1 and sentence 2 respectively and It will show the Correlation score and a mapping between the tokens of smaller sentence with the larger sentence.
--- a/assignment2/Report.pdf
+++ b/assignment2/Report.pdf
--- a/assignment2/additional/models/DTW_crossing.pt
+++ b/assignment2/additional/models/DTW_crossing.pt
--- a/assignment2/additional/models/DTW_non_crossing.pt
+++ b/assignment2/additional/models/DTW_non_crossing.pt
--- a/assignment2/additional/models/all_to_all_bert.pt
+++ b/assignment2/additional/models/all_to_all_bert.pt
--- a/assignment2/code/DTW.py
+++ b/assignment2/code/DTW.py
+import torch
+from transformers import BertModel
+class DTW(torch.nn.Module):
+    """
+    Model that uses DTW to check similarity between two sequence of sentence.
+    """
+    def __init__(self,pre_trained_model_name, crossing = True):
+        super(DTW, self).__init__()
+        self.crossing = crossing
+        self.bert_model = BertModel.from_pretrained(pre_trained_model_name, return_dict=False)
+        for param in self.bert_model.parameters():
+            param.requires_grad = False
+        self.cos = torch.nn.CosineSimilarity(dim = -1)
+        self.a = torch.nn.Parameter(torch.rand(1, requires_grad = True, dtype = torch.float))
+        self.b = torch.nn.Parameter(torch.rand(1, requires_grad = True, dtype = torch.float))
+        self.tanh = torch.nn.Tanh()
+    def forward(self, input_ids_1 = None, attention_mask_1 = None,
+                        token_type_ids_1 = None, input_ids_2 = None,
+                        attention_mask_2 = None, token_type_ids_2 = None, test = False):
+        """
+        Forward of this model that takes sentence as separate input and predict the similarity score using DTW.
+        """
+        output1 = self.bert_model(input_ids = input_ids_1, attention_mask = attention_mask_1, token_type_ids = token_type_ids_1)[0]
+        output2 = self.bert_model(input_ids = input_ids_2, attention_mask = attention_mask_2, token_type_ids = token_type_ids_2)[0]
+        sim_scores = []
+        for i in range(len(output1)):
+            sim_score = self.get_DTW_score(output1[i][attention_mask_1[i] == 1][1:-1], output2[i][attention_mask_2[i] == 1][1:-1],
+                                           return_map = False, crossing = self.crossing)
+            sim_scores.append(sim_score)   
+        return torch.cat(sim_scores)
+    def score(self,s1_i, s2_j, eps=1e-8):
+        """
+        Given two list of word embeddings find the cosine similarity between every pair of tokens in both sentence.
+        """
+        a_n, b_n = s1_i.norm(dim=1)[:, None], s2_j.norm(dim=1)[:, None]
+        a_norm = s1_i / torch.clamp(a_n, min=eps)
+        b_norm = s2_j / torch.clamp(b_n, min=eps)
+        sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
+        return self.tanh(sim_mt * self.a + self.b)
+    def get_DTW_score(self, s1, s2, return_map = False, crossing = False):
+        """
+        Method that helps in getting DTW similarity score given embeddings of tokens in sentences.
+        """
+        if len(s1) < len(s2):
+            s1, s2 = s2, s1
+        I = len(s1)
+        J = len(s2)
+        sim_mat = self.score(s1, s2)
+        if crossing:
+            sim = torch.sum(torch.max(sim_mat, dim = 0)[0]) / J
+            if return_map:
+                k = torch.argmax(sim_mat,dim=0)
+                return sim.reshape(1), k
+            return sim.reshape(1)
+        else:
+            M = sim_mat > 0
+            P = torch.zeros((I,J))
+            K = torch.zeros((I,J), dtype = torch.int)
+            for i in range(I):
+                P[i][0] = sim_mat[i][0]
+            K[:,0] = -1
+            for j in range(1, J):
+                max_val = float('-inf')
+                ptr = None
+                P[0][j] = max(0, sim_mat[0][j])
+                for i in range(1,I):
+                    if max_val < P[i-1][j-1]:
+                        max_val = P[i-1][j-1]
+                        ptr = i-1
+                    P[i][j] = max_val + max(0, sim_mat[i][j])
+                    K[i][j] = ptr
+            # print("sim_mat")
+            # print(sim_mat)
+            # print("P")
+            # print(P)
+            # print("M")
+            # print(M)
+            # print("K")
+            # print(K)
+            if return_map:
+                m = [None] * J
+                k = [None] * J
+                I_prime = int(torch.argmax(P,dim=0)[J-1])
+                m[-1] = I_prime
+                K[I_prime][0] = I_prime
+                if M[I_prime][J-1]:
+                    k[-1] = m[-1]
+                else:
+                    k[-1] = None
+                for j in range(J-2, -1, -1):
+                    m[j] = K[m[j+1]][j+1]
+                    if m[j] == -1 or m[j] == None:
+                        break
+                    if M[m[j]][j]:
+                        k[j] = m[j].item()
+                    else:
+                        k[j] = None
+                return P[I-1][J-1].reshape(1), k
+            return P[I-1][J-1].reshape(1) / J
--- a/assignment2/code/Q1.py
+++ b/assignment2/code/Q1.py
+import torch
+from transformers import BertModel, AutoTokenizer
+from torch.utils.data import DataLoader
+from transformers import AdamW
+from tqdm import tqdm
+from dataset import *
+from datasets import load_dataset, load_metric
+from functools import partial
+import os
+import argparse
+class STSBERTModel(torch.nn.Module):
+  """
+  Bert-tiny Model with a linear layer at the end to predict the value of similarity score
+  """
+  def __init__(self, pre_trained_model_name):
+    """
+    Init a bert model and a linear layer with a dropout layer in between
+    """
+    super(STSBERTModel, self).__init__()
+    self.bert_model = BertModel.from_pretrained(pre_trained_model_name, return_dict=False)
+    self.bert_drop = torch.nn.Dropout(0.3)
+    self.fnn = torch.nn.Linear(128, 1)
+  def forward(self, input_ids, attention_mask, token_type_ids):
+    """
+    Pass the input sent1 and sent2 together to bert model and use the pooled_output from it to predict similarity score
+    """
+    _, pooled_output = self.bert_model(input_ids = input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids)
+    output = self.fnn(self.bert_drop(pooled_output))
+    return output
+def all_to_all_collate_fn(data, tokenizer):
+    """
+    A helper function to create a batch of encoded sentence so that it can be passed to the bert model
+    """
+    input_sents = [i[0] for i in data]
+    labels = [i[1] for i in data]
+    input_encoded = tokenizer.batch_encode_plus(input_sents, padding="max_length", max_length = 32, truncation=True, return_tensors='pt')
+    return input_encoded, labels
+class STSBERTModelTrainer:
+    """
+    A class that can help in finetuning the bert-tiny for the STS task.
+    """
+    def __init__(self,pre_trained_model_name, device):
+        """
+        Initialise the STSBERTModel along with tokenizer and optimiser
+        """
+        self.task = "stsb"
+        self.device = device
+        self.model = STSBERTModel(pre_trained_model_name).to(device)
+        self.tokenizer = AutoTokenizer.from_pretrained(pre_trained_model_name)
+        self.optimizer = AdamW(self.model.parameters(), lr=1e-3)
+        self.metric = load_metric('glue', self.task)
+        return
+    def freeze_bert(self,_freeze=True):
+        """
+        Method to freeze the bert params for some of the epochs.
+        """
+        for param in self.model.bert_model.parameters():
+            param.requires_grad = not _freeze
+    def train(self, train_loader, val_loader, EPOCH, fz_bert_epoch):
+        """
+        Method to train the model.
+        For first fx_ber_epoch freeze the bert params and only train the params in linear layer.
+        """
+        self.freeze_bert()
+        for epoch in range(EPOCH):
+            print("Epoch ", epoch, ":")
+            if epoch == fz_bert_epoch:
+                self.freeze_bert(_freeze = False)
+                for g in self.optimizer.param_groups:
+                    g['lr'] = 1e-5
+            self.model.train()
+            train_loss = 0
+            for inputs, labels in tqdm(train_loader):
+                input_ids = torch.tensor(inputs['input_ids']).to(self.device)
+                input_attention_mask = torch.tensor(inputs['attention_mask']).to(self.device)
+                input_token_type_ids = torch.tensor(inputs['token_type_ids']).to(self.device)
+                labels = torch.tensor(labels).to(self.device)
+                self.optimizer.zero_grad()
+                outputs = self.model(input_ids = input_ids, attention_mask = input_attention_mask, token_type_ids = input_token_type_ids)
+                loss = torch.nn.functional.mse_loss(outputs, labels.view(-1, 1))
+                loss.backward()
+                self.optimizer.step()
+                train_loss += loss.item()
+            train_loss /= len(train_loader)
+            print("Train Loss:", train_loss)
+            val_loss, (targets, preds) = self.eval(val_loader)
+        return
+    def eval(self, val_loader, save_file = None):
+        """
+        Method to perform evaluation on the model.
+        """
+        self.model.eval()
+        val_loss = 0
+        with torch.no_grad():
+            predictions = []
+            targets = []
+            for inputs, labels in val_loader:
+                input_ids = torch.tensor(inputs['input_ids']).to(self.device)
+                input_attention_mask = torch.tensor(inputs['attention_mask']).to(self.device)
+                input_token_type_ids = torch.tensor(inputs['token_type_ids']).to(self.device)
+                labels = torch.tensor(labels).to(self.device)
+                outputs = self.model(input_ids = input_ids, attention_mask = input_attention_mask, token_type_ids = input_token_type_ids)
+                loss = torch.nn.functional.mse_loss(outputs, labels.view(-1, 1))
+                val_loss += loss.item()
+                outputs = outputs.squeeze()
+                predictions.extend(outputs.tolist())
+                targets.extend(labels)
+            val_loss /= len(val_loader)
+            print("Val Loss:", val_loss)
+            print(predictions)
+            print(targets)
+            print("Sim Metric:", self.metric.compute(predictions=predictions, references=targets))
+            if save_file:
+                print("Saving outputs in ", save_file)
+                with open(save_file,'w') as ofile:
+                    for item, _ in enumerate(predictions):
+                        ofile.write(str(predictions[item])+"\n")
+        return val_loss, (targets, predictions)
+    def save_model(self, model_dir = None):
+        """
+        Method to save the model
+        """
+        if not os.path.exists(model_dir):
+            os.makedirs(model_dir)
+        file_name = "all_to_all_bert"
+        file_name = model_dir + file_name + ".pt"
+        print("saving model into ", file_name)
+        torch.save(self.model.state_dict(),
+                file_name)
+        return
+    def load_model(self, model_dir = None):
+        """
+        method to load the model.
+        """
+        file_name = "all_to_all_bert"
+        file_name = model_dir + file_name + ".pt"
+        print("Loading from ", file_name)
+        self.model.load_state_dict(torch.load(file_name))
+        return 
+    def test_sentence(self):
+        """
+        Method to perform inference on the sentences.
+        """
+        print("Press Ctrl+C to end this prompt:")
+        while True:
+            sent1 = str(input("Enter Sentence 1: "))
+            sent2 = str(input("Enter Sentence 2: "))
+            print("Sent1: ",sent1)
+            print("Sent2: ",sent2)
+            encodded_sent = self.tokenizer.encode_plus((sent1,sent2), return_tensors='pt')
+            print(encodded_sent)
+            self.model.eval()
+            with torch.no_grad():
+                sim_score = self.model(input_ids = encodded_sent['input_ids'], attention_mask = encodded_sent['attention_mask'], token_type_ids = encodded_sent['token_type_ids'])
+                print("Sim Score:", sim_score)
+def main():
+    parser = argparse.ArgumentParser(description="STS using BERT-Tiny",
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("-t", "--train", action='store_true', help="Whether to train the model")
+    parser.add_argument("-i", "--infer", action='store_true', help="Whether to infer using command line")
+    parser.add_argument("-e", "--epoch", default=30, help="number of epoch to train")
+    parser.add_argument("-b", "--batchsz", default=16, help="Size of the batch to use while training")
+    args = vars(parser.parse_args())
+    BATCH_SIZE = int(args["batchsz"])
+    EPOCH = int(args["epoch"])
+    train = args["train"]
+    infer = args["infer"]
+    task = "stsb"
+    fz_bert_epoch = 10
+    seed = 42
+    pre_trained_model_name = "prajjwal1/bert-tiny"
+    torch.manual_seed(seed)
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    print("Using device: ",device)
+    trainer = STSBERTModelTrainer(pre_trained_model_name, device)
+    if train:
+        print("Finetuning the Model")
+        dataset = load_dataset("glue", task)
+        train_dataset = STSDataset(dataset["train"])
+        val_dataset = STSDataset(dataset["validation"])
+        test_dataset = STSDataset(dataset["test"])
+        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn =partial(all_to_all_collate_fn, tokenizer = trainer.tokenizer))
+        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,collate_fn = partial(all_to_all_collate_fn, tokenizer = trainer.tokenizer))
+        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,collate_fn = partial(all_to_all_collate_fn, tokenizer = trainer.tokenizer))
+        trainer.load_model(model_dir="./additional/models/")
+        #trainer.train(train_loader, val_loader, EPOCH, fz_bert_epoch)
+        #print("Testing for Validation dataset")
+        #trainer.eval(val_loader, "./output/Q1_val.txt")
+        print("Testing for Test dataset")
+        trainer.eval(test_loader, "./output/Q1_test.txt")
+        trainer.save_model("./additional/models/")
+    if infer:
+        print("Infering from the model")
+        trainer.load_model(model_dir="./additional/models/")
+        trainer.test_sentence()
+if __name__ == "__main__":
+   main()
\ No newline at end of file
--- a/assignment2/code/Q3.py
+++ b/assignment2/code/Q3.py
--- a/assignment2/code/dataset.py
+++ b/assignment2/code/dataset.py
+import torch
+class STSDataset(torch.utils.data.Dataset):
+    """
+    Dataset class to help in creating a batch of data.
+    """
+    def __init__(self, data):
+        self.data = data
+    def __getitem__(self, index):
+        text1 = self.data[index]['sentence1']
+        text2 = self.data[index]['sentence2']
+        label = self.data[index]['label']
+        return (text1, text2), label
+    def __len__(self):
+        return len(self.data)
--- a/assignment2/output/Q1_test.txt
+++ b/assignment2/output/Q1_test.txt
--- a/assignment2/output/Q1_val.txt
+++ b/assignment2/output/Q1_val.txt
--- a/assignment2/output/Q3_1_val.txt
+++ b/assignment2/output/Q3_1_val.txt
--- a/assignment2/output/Q3_crossing_test.txt
+++ b/assignment2/output/Q3_crossing_test.txt
--- a/assignment2/output/Q3_crossing_val.txt
+++ b/assignment2/output/Q3_crossing_val.txt
--- a/assignment2/output/Q3_non_crossing_test.txt
+++ b/assignment2/output/Q3_non_crossing_test.txt
--- a/assignment2/output/Q3_non_crossing_val.txt
+++ b/assignment2/output/Q3_non_crossing_val.txt
--- a/assignment2/report.odt
+++ b/assignment2/report.odt
--- a/assignment2/requirements.txt
+++ b/assignment2/requirements.txt
+datasets==2.10.1
+torch==1.13.1
+torch==1.12.1
+tqdm==4.65.0
+transformers==4.24.0
+transformers==4.27.0.dev0
+transformers.egg==info