Commit a3502f7f authored by Dishank's avatar Dishank

New changes are to be uploaded

parent 2ead04fd
...@@ -8,6 +8,13 @@ from filling.ilm.infer import infill_with_ilm ...@@ -8,6 +8,13 @@ from filling.ilm.infer import infill_with_ilm
from nltk.corpus import wordnet as wn from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize from nltk.tokenize import word_tokenize, sent_tokenize
from transformers import AutoModelForMaskedLM, AutoTokenizer
import numpy as np
model_name = 'cointegrated/rubert-tiny'
model1 = AutoModelForMaskedLM.from_pretrained(model_name)
tokenizer1 = AutoTokenizer.from_pretrained(model_name)
stop_words = set(stopwords.words('english')) stop_words = set(stopwords.words('english'))
MODEL_DIR = None MODEL_DIR = None
...@@ -33,11 +40,31 @@ model = GPT2LMHeadModel.from_pretrained(MODEL_DIR) ...@@ -33,11 +40,31 @@ model = GPT2LMHeadModel.from_pretrained(MODEL_DIR)
model.eval() model.eval()
_ = model.to(device) _ = model.to(device)
def hello(str):
def score(model, tokenizer, sentence):
tensor_input = tokenizer.encode(sentence, return_tensors='pt')
repeat_input = tensor_input.repeat(tensor_input.size(-1)-2, 1)
mask = torch.ones(tensor_input.size(-1) - 1).diag(1)[:-2]
masked_input = repeat_input.masked_fill(mask == 1, tokenizer.mask_token_id)
labels = repeat_input.masked_fill( masked_input != tokenizer.mask_token_id, -100)
with torch.inference_mode():
loss = model(masked_input, labels=labels).loss
return np.exp(loss.item())
def hello(str , label_value):
modified_context = str modified_context = str
modified_context = modified_context.replace(' _',"") modified_context = modified_context.replace(' _',"")
tokenized = sent_tokenize(modified_context) tokenized = sent_tokenize(modified_context)
word_list=[] word_list=[]
infill_type=""
if label_value==0:
infill_type = "<|infill_word|>"
elif label_value==1:
infill_type = '<|infill_ngram|>'
elif label_value ==2:
infill_type = '<|infill_sentence|>'
for i in tokenized: for i in tokenized:
# Word tokenizers is used to find the words # Word tokenizers is used to find the words
...@@ -66,14 +93,64 @@ def hello(str): ...@@ -66,14 +93,64 @@ def hello(str):
# print(str) # print(str)
context_ids = filling.ilm.tokenize_util.encode(str, tokenizer) context_ids = filling.ilm.tokenize_util.encode(str, tokenizer)
_blank_id = filling.ilm.tokenize_util.encode(' _', tokenizer)[0] _blank_id = filling.ilm.tokenize_util.encode(' _', tokenizer)[0]
context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_ngram|>'] no_infill = str.count(' _')
for i in range(no_infill):
print(i)
i+=1
context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids[infill_type]
print(filling.ilm.tokenize_util.decode(context_ids,tokenizer)) print(filling.ilm.tokenize_util.decode(context_ids,tokenizer))
generated = infill_with_ilm( generated = infill_with_ilm(
model, model,
additional_tokens_to_ids, additional_tokens_to_ids,
context_ids, context_ids,
num_infills=1) num_infills=3)
result=[]
for g in generated:
if length:
temp = filling.ilm.tokenize_util.decode(g,tokenizer)[:-length]
else:
temp = filling.ilm.tokenize_util.decode(g,tokenizer)
print(temp)
result.append(temp)
print(score(model1, tokenizer1, sentence=temp))
score1=score(model1, tokenizer1, sentence=temp)
print(type(score1))
# temp2= ""+score
# result+= temp2+"\n\n"
# print(result)
return result
def old(str , label_value):
modified_context = str
modified_context = modified_context.replace(' _',"")
infill_type=""
if label_value==0:
infill_type = "<|infill_word|>"
elif label_value==1:
infill_type = '<|infill_ngram|>'
elif label_value ==2:
infill_type = '<|infill_sentence|>'
context_ids = filling.ilm.tokenize_util.encode(str, tokenizer)
_blank_id = filling.ilm.tokenize_util.encode(' _', tokenizer)[0]
context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids[infill_type]
print(filling.ilm.tokenize_util.decode(context_ids,tokenizer))
generated = infill_with_ilm(
model,
additional_tokens_to_ids,
context_ids,
num_infills=3)
result=[]
for g in generated: for g in generated:
result = filling.ilm.tokenize_util.decode(g,tokenizer)[:-length] temp = filling.ilm.tokenize_util.decode(g,tokenizer)
print(temp)
result.append(temp)
print(score(model1, tokenizer1, sentence=temp))
score1=score(model1, tokenizer1, sentence=temp)
print(type(score1))
# temp2= ""+score
# result+= temp2+"\n\n"
return result return result
\ No newline at end of file
...@@ -10,10 +10,17 @@ def home(request): ...@@ -10,10 +10,17 @@ def home(request):
def result(request): def result(request):
flag=1
string1 =(request.POST['input']) string1 =(request.POST['input'])
print(string1) print(string1)
string2 = ul.hello(string1) if string1[0]==' ':
flag =0
label_value = (request.POST['type'])
label_value=int(label_value)
if flag:
string2 = ul.hello(string1, label_value)
else:
print("Entering the old logic")
string2 = ul.old(string1,label_value)
return render(request, 'home.html' , {'result':string2 , 'input':string1}) return render(request, 'home.html' , {'result':string2 , 'input':string1})
\ No newline at end of file
.div1{ .div111{
background-color: #4158D0; background-color: #4158D0;
background-image: linear-gradient(43deg, #4158D0 0%, #C850C0 46%, #FFCC70 100%); background-image: linear-gradient(43deg, #4158D0 0%, #C850C0 46%, #FFCC70 100%);
margin: 25px; margin: 25px;
height: 300px; height: 300px;
border-radius: 75px; border-radius: 75px;
display: flex;
justify-content: center;
} }
.div2{ .div2{
...@@ -21,10 +24,14 @@ body{ ...@@ -21,10 +24,14 @@ body{
background-image: linear-gradient(120deg, #e0c3fc 0%, #8ec5fc 100%); background-image: linear-gradient(120deg, #e0c3fc 0%, #8ec5fc 100%);
} }
h1{ h1{
padding-top: 100px;
font-size: 100px; font-size: 100px;
padding-left: 100px;
text-align: center; text-align: center;
color: white; color: white;
} }
.input{ .input{
margin: 50px; margin: 50px;
...@@ -48,11 +55,11 @@ h1{ ...@@ -48,11 +55,11 @@ h1{
padding: 10px; padding: 10px;
color: darkblue; color: darkblue;
} }
.btn{ .btn1{
display: flex; display: flex;
justify-content: center; justify-content: center;
} }
.sub_btn{ .sub_btn1{
justify-content: center ; justify-content: center ;
width: 200px; width: 200px;
color: white; color: white;
...@@ -67,6 +74,7 @@ h1{ ...@@ -67,6 +74,7 @@ h1{
} }
.result{ .result{
padding: 20px;
margin: 50px; margin: 50px;
border-radius: 50px; border-radius: 50px;
text-align: center; text-align: center;
...@@ -79,10 +87,31 @@ h1{ ...@@ -79,10 +87,31 @@ h1{
} }
.image{ .image{
display: flex; display: flex;
justify-content: center; height: 200px;
padding-top: 50px;
padding-left: 50px;
} }
image{ .checks{
display: flex;
justify-content: center; justify-content: center;
height: 200px; }
width: 200px; .checks div{
margin: 50px;
}
div label{
font-size: 30px;
}
.select_statement{
font-size: 25px;
font-weight: bold;
text-align: center;
}
footer{
background-color: rgb(44, 42, 42);
color: white;
padding: 10px;
} }
\ No newline at end of file
import nltk
nltk.download('omw-1.4')
\ No newline at end of file
...@@ -7,13 +7,21 @@ ...@@ -7,13 +7,21 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Text Infilling</title> <title>Text Infilling</title>
<link rel="stylesheet" href="{% static 'styles.css'%}"> <link rel="stylesheet" href="{% static 'styles.css'%}">
<!-- Required meta tags -->
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<!-- Bootstrap CSS -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
</head> </head>
<body> <body>
<div class="div111">
<div class="image"> <div class="image">
<img src="{%static 'cfilt.jpg'%}" alt="symbol"> <img src="{%static 'cfilt.jpg'%}" alt="symbol" style="border-radius: 25%;">
<h1 class="display-1" style="padding-top: 10px;">Text Infilling</h1>
</div> </div>
<div class="div1">
<h1>Text Infilling</h1>
</div> </div>
<div class="div2"> <div class="div2">
...@@ -21,22 +29,87 @@ ...@@ -21,22 +29,87 @@
<form action="result" method="post"> <form action="result" method="post">
{% csrf_token %} {% csrf_token %}
<div class="input">Enter the infilling string: <br> <input type="text" name ="input" placeholder="Enter the text to be infilled"><br></div> <div class="input">Enter the infilling string: <br> <input type="text" name ="input" placeholder="Enter the text to be infilled"><br></div>
<div class="btn"> <div class="select_statement">Please choose the type of infilling</div>
<input class="sub_btn" type="submit"> <div class="checks">
<div class="filling_type">
<input type="radio" id="vehicle1" name="type" value=0>
<label for="vehicle1">Infill word</label><br>
</div>
<div class="filling_type">
<input type="radio" id="vehicle3" name="type" value=1 checked>
<label for="vehicle3">Infill Ngram</label><br>
</div> </div>
<div class="filling_type">
<input type="radio" id="vehicle2" name="type" value=2>
<label for="vehicle2">Infill Sentence</label><br></div>
<!-- <div class="filling_type">
<input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
<label for="vehicle2">Infill Sentence</label><br></div> -->
<!-- <div class="filling_type">
<input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
<label for="vehicle2">Infill paragraph</label><br></div>
<div class="filling_type">
<input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
<label for="vehicle2">Infill Sentence</label><br></div> -->
<!-- This is trash need to take care of it later -->
<!-- <input type="checkbox" id="vehicle1" name="vehicle1" value="Bike">
<label for="vehicle1"> I have a bike</label><br> -->
<!-- <input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
<label for="vehicle2"> I have a car</label><br> -->
<!-- <input type="checkbox" id="vehicle3" name="vehicle3" value="Boat">
<label for="vehicle3"> I have a boat</label><br> -->
</div>
<div class="btn1">
<input class="sub_btn1" type="submit">
</div>
</form> </form>
<div class="result"> <div class="result">
<h1>Input:</h1>
{% block include %} {% block include %}
{{ input }} <table class="table table-striped">
<thead class="thead-dark">
<tr>
<th scope="col">Input</th>
</tr>
</thead>
<tbody>
<tr>
<th scope="row">{{ input }}</th>
</tr>
</tbody>
</table>
{% endblock include %} {% endblock include %}
<h1>Result:</h1>
{% block includes %} {% block includes %}
{{ result }} <table class="table">
<thead class="thead-dark">
<tr>
<th scope="col">Result</th>
</tr>
</thead>
<tbody>
{% for res in result %}
<tr>
<th scope="row">{{res}}</th>
</tr>
{%endfor%}
</tbody>
</table>
{% endblock includes %} {% endblock includes %}
</div> </div>
<footer>
<p>Author: Dishank Aggarwal IIT Bombay MS RAP</p>
<p><a href="www.google.com">Cfilt IITB</a></p>
</footer>
</body> </body>
</html> </html>
\ No newline at end of file
...@@ -26,7 +26,7 @@ SECRET_KEY = 'django-insecure-^2se-l4rkpy&@u=r4ty#ped1gk@ww#5_orer1wn_wc8a9e#7%+ ...@@ -26,7 +26,7 @@ SECRET_KEY = 'django-insecure-^2se-l4rkpy&@u=r4ty#ped1gk@ww#5_orer1wn_wc8a9e#7%+
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True DEBUG = True
ALLOWED_HOSTS = [] ALLOWED_HOSTS = ['localhost','10.129.131.198','*']
# Application definition # Application definition
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment