Question generator
question generation using nlp and transformers
- install the necessary modules
pip install sentence-transformers transformers
2. import the necessary modules
from sentence_transformers import SentenceTransformer
from transformers import T5ForConditionalGeneration, T5Tokenizer, BertTokenizer, BertModel, AutoTokenizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import spacy
from transformers import BertTokenizer, BertModel
from warnings import filterwarnings as filt
filt('ignore')
3. initialize the pre-trained models
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained("bert-base-uncased")
model = SentenceTransformer('distilbert-base-nli-mean-tokens')
nlp = spacy.load("en_core_web_sm")
4. create a function for the generating a question.
def get_question(sentence, answer):
mdl = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
tknizer = AutoTokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
text = "context: {} answer: {}".format(sentence,answer)
max_len = 256
encoding = tknizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt")
input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
outs = mdl.generate(input_ids=input_ids,
attention_mask=attention_mask,
early_stopping=True,
num_beams=5,
num_return_sequences=1,
no_repeat_ngram_size=2,
max_length=300)
dec = [tknizer.decode(ids,skip_special_tokens=True) for ids in outs]
Question = dec[0].replace("question:","")
Question= Question.strip()
return Question
this function requires two parameters :
- sentence
- answers
it generate question for the sentence based on the answers using the t5_squad_v1 pre-trained model and returns the list of questions
Answers can be anything like a key.
for eg :
sentence :
tact labs is a company.
answers :
company
returns :
['what is tact labs']
5. now create another function to generate meaningful keywords aka answers
def get_embedding(doc):
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained("bert-base-uncased")
# txt = '[CLS] ' + doc + ' [SEP]'
tokens = bert_tokenizer.tokenize(txt)
token_idx = bert_tokenizer.convert_tokens_to_ids(tokens)
segment_ids = [1] * len(tokens)
torch_token = torch.tensor([token_idx])
torch_segment = torch.tensor([segment_ids])
return bert_model(torch_token, torch_segment)[-1].detach().numpy()
def get_pos(context):
doc = nlp(context)
docs = [d.pos_ for d in doc]
return docs, context.split()
def get_sent(context):
doc = nlp(context)
return list(doc.sents)
def get_vector(doc):
stop_words = "english"
n_gram_range = (1,1)
df = CountVectorizer(ngram_range = n_gram_range, stop_words = stop_words).fit([doc])
return df.get_feature_names()
def get_key_words(context, module_type = 't'):
keywords = []
top_n = 5
for txt in get_sent(context):
keywd = get_vector(str(txt))
print(f'vectors : {keywd}')
if module_type == 't':
doc_embedding = get_embedding(str(txt))
keywd_embedding = get_embedding(' '.join(keywd))
else:
doc_embedding = model.encode([str(txt)])
keywd_embedding = model.encode(keywd)
distances = cosine_similarity(doc_embedding, keywd_embedding)
print(distances)
keywords += [(keywd[index], str(txt)) for index in distances.argsort()[0][-top_n:]]
return keywords
it uses sklearn’s count vectorizer to get the important keywords and returns the list of tuples which contains both the sentence and the answer which we can then use it in generate_question function to get the questions
txt = 'Mauricio Pochettino open to leaving Paris St-Germain if Man Utd make approach'get_key_words(txt)------------------------------------------------------------------[('approach',
'Mauricio Pochettino open to leaving Paris St-Germain if Man Utd make approach')]
now lets use the above text to generate the question
txt = 'Mauricio Pochettino open to leaving Paris St-Germain if Man Utd make approach'
for ans, context in get_key_words(txt, 'st'):
print('=======================================')
print()
print(get_question(context, ans))
print()
=================================================================Who is open to leaving Paris St-Germain if Man Utd make an approach?
=======================================
Who is open to leaving Paris St-Germain if Man Utd make an approach?
=======================================
Is Mauricio Pochettino open to leaving Paris St-Germain?
=======================================
Pochettino is open to leaving Paris St-Germain if Man Utd make an approach?
=======================================
What city is Mauricio Pochettino open to leaving if Man Utd make an approach?