New changes are to be uploaded

a3502f7f · Dishank · 2ead04fd · a3502f7f · a3502f7f · a3502f7f
Commit a3502f7f authored May 11, 2022 by Dishank
19 changed files
--- a/filling/__pycache__/__init__.cpython-38.pyc
+++ b/filling/__pycache__/__init__.cpython-38.pyc
--- a/filling/__pycache__/urls.cpython-38.pyc
+++ b/filling/__pycache__/urls.cpython-38.pyc
--- a/filling/__pycache__/utility1.cpython-38.pyc
+++ b/filling/__pycache__/utility1.cpython-38.pyc
--- a/filling/__pycache__/views.cpython-38.pyc
+++ b/filling/__pycache__/views.cpython-38.pyc
--- a/filling/ilm/__pycache__/constants.cpython-38.pyc
+++ b/filling/ilm/__pycache__/constants.cpython-38.pyc
--- a/filling/ilm/__pycache__/infer.cpython-38.pyc
+++ b/filling/ilm/__pycache__/infer.cpython-38.pyc
--- a/filling/ilm/__pycache__/paths.cpython-38.pyc
+++ b/filling/ilm/__pycache__/paths.cpython-38.pyc
--- a/filling/ilm/__pycache__/tokenize_util.cpython-38.pyc
+++ b/filling/ilm/__pycache__/tokenize_util.cpython-38.pyc
--- a/filling/ilm/official_gpt2_encoder/__pycache__/encoder.cpython-38.pyc
+++ b/filling/ilm/official_gpt2_encoder/__pycache__/encoder.cpython-38.pyc
--- a/filling/utility1.py
+++ b/filling/utility1.py
@@ -8,6 +8,13 @@ from filling.ilm.infer import infill_with_ilm
 from nltk.corpus import wordnet as wn
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize, sent_tokenize
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+import numpy as np
+model_name = 'cointegrated/rubert-tiny'
+model1 = AutoModelForMaskedLM.from_pretrained(model_name)
+tokenizer1 = AutoTokenizer.from_pretrained(model_name)
 stop_words = set(stopwords.words('english'))
 MODEL_DIR = None
@@ -33,11 +40,31 @@ model = GPT2LMHeadModel.from_pretrained(MODEL_DIR)
 model.eval()
 _ = model.to(device)
-def hello(str):
+def score(model, tokenizer, sentence):
+    tensor_input = tokenizer.encode(sentence, return_tensors='pt')
+    repeat_input = tensor_input.repeat(tensor_input.size(-1)-2, 1)
+    mask = torch.ones(tensor_input.size(-1) - 1).diag(1)[:-2]
+    masked_input = repeat_input.masked_fill(mask == 1, tokenizer.mask_token_id)
+    labels = repeat_input.masked_fill( masked_input != tokenizer.mask_token_id, -100)
+    with torch.inference_mode():
+        loss = model(masked_input, labels=labels).loss
+    return np.exp(loss.item())
+def hello(str , label_value):
    modified_context = str
    modified_context = modified_context.replace(' _',"")
    tokenized = sent_tokenize(modified_context)
    word_list=[]
+    infill_type=""
+    if label_value==0:
+        infill_type = "<|infill_word|>"
+    elif label_value==1:
+        infill_type = '<|infill_ngram|>'
+    elif label_value ==2:
+        infill_type = '<|infill_sentence|>'
    for i in tokenized:
        # Word tokenizers is used to find the words
@@ -66,14 +93,64 @@ def hello(str):
    # print(str)
    context_ids = filling.ilm.tokenize_util.encode(str, tokenizer)
    _blank_id = filling.ilm.tokenize_util.encode(' _', tokenizer)[0]
-    context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_ngram|>']
+    no_infill = str.count(' _')
+    for i in range(no_infill):
+        print(i)
+        i+=1
+        context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids[infill_type]
    print(filling.ilm.tokenize_util.decode(context_ids,tokenizer))
    generated = infill_with_ilm(
        model,
        additional_tokens_to_ids,
        context_ids,
-        num_infills=1)
+        num_infills=3)
+    result=[]
+    for g in generated:
+        if length:
+            temp = filling.ilm.tokenize_util.decode(g,tokenizer)[:-length]
+        else:
+            temp = filling.ilm.tokenize_util.decode(g,tokenizer)
+        print(temp)
+        result.append(temp)
+        print(score(model1, tokenizer1, sentence=temp))
+        score1=score(model1, tokenizer1, sentence=temp)
+        print(type(score1))
+        # temp2= ""+score
+        # result+= temp2+"\n\n"
+    # print(result)
+    return result
+def old(str , label_value):
+    modified_context = str
+    modified_context = modified_context.replace(' _',"")
+    infill_type=""
+    if label_value==0:
+        infill_type = "<|infill_word|>"
+    elif label_value==1:
+        infill_type = '<|infill_ngram|>'
+    elif label_value ==2:
+        infill_type = '<|infill_sentence|>'
+    context_ids = filling.ilm.tokenize_util.encode(str, tokenizer)
+    _blank_id = filling.ilm.tokenize_util.encode(' _', tokenizer)[0]
+    context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids[infill_type]
+    print(filling.ilm.tokenize_util.decode(context_ids,tokenizer))
+    generated = infill_with_ilm(
+        model,
+        additional_tokens_to_ids,
+        context_ids,
+        num_infills=3)
+    result=[]
    for g in generated:
-        result = filling.ilm.tokenize_util.decode(g,tokenizer)[:-length]
+        temp = filling.ilm.tokenize_util.decode(g,tokenizer)
+        print(temp)
+        result.append(temp)
+        print(score(model1, tokenizer1, sentence=temp))
+        score1=score(model1, tokenizer1, sentence=temp)
+        print(type(score1))
+        # temp2= ""+score
+        # result+= temp2+"\n\n"
    return result
\ No newline at end of file
--- a/filling/views.py
+++ b/filling/views.py
@@ -10,10 +10,17 @@ def home(request):
 def result(request):
+    flag=1
    string1 =(request.POST['input'])
    print(string1)
-    string2 = ul.hello(string1)
+    if string1[0]==' ':
+        flag =0
+    label_value = (request.POST['type'])
+    label_value=int(label_value)
+    if flag:
+        string2 = ul.hello(string1, label_value)
+    else:
+        print("Entering the old logic")
+        string2 = ul.old(string1,label_value)
    return render(request, 'home.html' , {'result':string2 , 'input':string1})
\ No newline at end of file
--- a/static/styles.css
+++ b/static/styles.css
-.div1{
+.div111{
    background-color: #4158D0;
 background-image: linear-gradient(43deg, #4158D0 0%, #C850C0 46%, #FFCC70 100%);
 margin: 25px;
    height: 300px;
    border-radius: 75px;
+    display: flex;
+    justify-content: center;
 }
 .div2{
@@ -21,10 +24,14 @@ body{
    background-image: linear-gradient(120deg, #e0c3fc 0%, #8ec5fc 100%);
 }
 h1{
-    padding-top: 100px;
    font-size: 100px;
+    padding-left: 100px;
    text-align: center;
    color: white;
 }
 .input{
    margin: 50px;
@@ -48,11 +55,11 @@ h1{
    padding: 10px;
    color: darkblue;
 }
-.btn{
+.btn1{
    display: flex;
    justify-content: center;
 }
-.sub_btn{
+.sub_btn1{
    justify-content: center ;
    width: 200px;
    color: white;
@@ -67,6 +74,7 @@ h1{
 }
 .result{
+    padding: 20px;
    margin: 50px;
    border-radius: 50px;
    text-align: center;
@@ -79,10 +87,31 @@ h1{
 }
 .image{
    display: flex;
-    justify-content: center;
+    height: 200px;
+    padding-top: 50px;
+    padding-left: 50px;
 }
-image{
+.checks{
+    display: flex;
    justify-content: center;
-    height: 200px;
+}
-    width: 200px;
+.checks div{
+    margin: 50px;
+}
+div label{
+    font-size: 30px;
+}
+.select_statement{
+    font-size: 25px;
+    font-weight: bold;
+    text-align: center;
+}
+footer{
+    background-color: rgb(44, 42, 42);
+    color: white;
+    padding: 10px;
 }
\ No newline at end of file
--- a/temp.py
+++ b/temp.py
+import nltk
+nltk.download('omw-1.4')
\ No newline at end of file
--- a/templates/home.html
+++ b/templates/home.html
@@ -7,13 +7,21 @@
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Text Infilling</title>
    <link rel="stylesheet" href="{% static 'styles.css'%}">
+    <!-- Required meta tags -->
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
+    <!-- Bootstrap CSS -->
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
 </head>
 <body>
+    <div class="div111">
        <div class="image">
-        <img src="{%static 'cfilt.jpg'%}" alt="symbol">
+            <img src="{%static 'cfilt.jpg'%}" alt="symbol" style="border-radius: 25%;">
+            <h1 class="display-1" style="padding-top: 10px;">Text Infilling</h1>
        </div>
-    <div class="div1">
-        <h1>Text Infilling</h1>
    </div>
    <div class="div2">
@@ -21,22 +29,87 @@
    <form action="result" method="post">
        {% csrf_token %}
        <div class="input">Enter the infilling string: <br> <input type="text" name ="input" placeholder="Enter the text to be infilled"><br></div>
-        <div class="btn">
+        <div class="select_statement">Please choose the type of infilling</div>
-            <input class="sub_btn" type="submit">
+        <div class="checks">
+            <div class="filling_type">
+                <input type="radio" id="vehicle1" name="type" value=0>
+                <label for="vehicle1">Infill word</label><br>
+            </div>
+            <div class="filling_type">
+                <input type="radio" id="vehicle3" name="type" value=1 checked>
+            <label for="vehicle3">Infill Ngram</label><br>
            </div>
+            <div class="filling_type"> 
+                <input type="radio" id="vehicle2" name="type" value=2>
+                <label for="vehicle2">Infill Sentence</label><br></div>
+            <!-- <div class="filling_type"> 
+                <input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
+                <label for="vehicle2">Infill Sentence</label><br></div> -->
+            <!-- <div class="filling_type"> 
+                <input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
+                <label for="vehicle2">Infill paragraph</label><br></div>
+            <div class="filling_type"> 
+                <input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
+                <label for="vehicle2">Infill Sentence</label><br></div>         -->
+                <!-- This is trash need to take care of it later -->
+            <!-- <input type="checkbox" id="vehicle1" name="vehicle1" value="Bike">
+            <label for="vehicle1"> I have a bike</label><br> -->
+            <!-- <input type="checkbox" id="vehicle2" name="vehicle2" value="Car">
+            <label for="vehicle2"> I have a car</label><br> -->
+            <!-- <input type="checkbox" id="vehicle3" name="vehicle3" value="Boat">
+            <label for="vehicle3"> I have a boat</label><br> -->
+        </div>
+        <div class="btn1">
+            <input class="sub_btn1" type="submit">
+        </div>
    </form>
    <div class="result">
-        <h1>Input:</h1>
        {% block include %}
-        {{ input }}
+        <table class="table table-striped">
+            <thead class="thead-dark">
+              <tr>
+                <th scope="col">Input</th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <th scope="row">{{ input }}</th>
+              </tr>
+            </tbody>
+          </table>
        {% endblock include %}
-        <h1>Result:</h1>
        {% block includes %}
-        {{ result }}
+        <table class="table">
+            <thead class="thead-dark">
+              <tr>
+                <th scope="col">Result</th>
+              </tr>
+            </thead>
+            <tbody>
+                {% for res in result %}
+              <tr>
+                <th scope="row">{{res}}</th>
+              </tr>
+              {%endfor%}
+            </tbody>
+          </table>
        {% endblock includes %}
    </div>
+    <footer>
+        <p>Author: Dishank Aggarwal IIT Bombay MS RAP</p>
+        <p><a href="www.google.com">Cfilt IITB</a></p>
+      </footer>
 </body>
 </html>
\ No newline at end of file
--- a/textInfilling/__pycache__/__init__.cpython-38.pyc
+++ b/textInfilling/__pycache__/__init__.cpython-38.pyc
--- a/textInfilling/__pycache__/settings.cpython-38.pyc
+++ b/textInfilling/__pycache__/settings.cpython-38.pyc
--- a/textInfilling/__pycache__/urls.cpython-38.pyc
+++ b/textInfilling/__pycache__/urls.cpython-38.pyc
--- a/textInfilling/__pycache__/wsgi.cpython-38.pyc
+++ b/textInfilling/__pycache__/wsgi.cpython-38.pyc
--- a/textInfilling/settings.py
+++ b/textInfilling/settings.py
@@ -26,7 +26,7 @@ SECRET_KEY = 'django-insecure-^2se-l4rkpy&@u=r4ty#ped1gk@ww#5_orer1wn_wc8a9e#7%+
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = True
-ALLOWED_HOSTS = []
+ALLOWED_HOSTS = ['localhost','10.129.131.198','*']
 # Application definition