Loading context_np.py +27 −4 Original line number Diff line number Diff line Loading @@ -70,7 +70,7 @@ def name_phrases(text, title, vocabulary, context_window, num_phr_per_sent, w2v) context_phrases = persistent.list.PersistentList() context_position = curr_sent_pos - 1 while (context_position >= 0) and (curr_sent_pos - context_position <= context_window): context_phrases += phrases_per_sentence[context_position][:num_phr_per_sent] context_phrases.append(phrases_per_sentence[context_position][:num_phr_per_sent]) context_position -= 1 # Title as a context for first sentence in document Loading @@ -81,7 +81,7 @@ def name_phrases(text, title, vocabulary, context_window, num_phr_per_sent, w2v) word, lemma, tag = token.strip().split('\t')[:3] wid = word2id(vocabulary, word, lemma, tag, w2v) title_phr.append(wid) context_phrases.append(title_phr) context_phrases.append([title_phr]) text_context.append(context_phrases) Loading Loading @@ -131,14 +131,37 @@ def add_np_phrases(db, context_window, num_phr_per_sent, w2v, verbose=False): for sent_num, sent in enumerate(text['text']): if verbose: print(f"s:{' '.join([id2word(vocabulary, x)['word'] for x in sent['sent']])}") for phr in phrases[sent_num]: print(f'\t\tp:{" ".join([id2word(vocabulary, x)["word"] for x in phr])}') for phrs in phrases[sent_num]: for phr in phrs: print(f'\t\tp:{" ".join([id2word(vocabulary, x)["word"] for x in phr])}') if not sent['ctx'].get(f'name_phrs_w{context_window}_n{num_phr_per_sent}'): sent['ctx'][f'name_phrs_w{context_window}_n{num_phr_per_sent}'] = phrases[sent_num] db._p_changed = True transaction.commit() def get_ctx(phrs, vocabulary, part): sentence_phrases = [] for sent_phr in phrs: phr_per_sent = [] for p in sent_phr: p_content = [] for w_id_cx in p: if part: p_content.append(id2word(vocabulary, w_id_cx, part)) else: p_content.append(id2word(vocabulary, w_id_cx)) phr_per_sent.append(p_content) sentence_phrases.append(phr_per_sent) return sentence_phrases def print_ctx(phrs): for idx, sent_phr in enumerate(phrs): for p in sent_phr: print(f'\t\t\tc(-{idx+1}): {" ".join([x["word"] for x in p])}') def main(): import argparse parser = argparse.ArgumentParser(description='Add noun phrases as context to sentences') Loading context_previous_senteces.py +18 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,24 @@ def add_ctx(db, number, verbose=False): db._p_changed = True transaction.commit() def get_ctx(phrs, vocabulary, part): content = [] for p in phrs: p_content = [] for w_id_cx in p: if part: p_content.append(id2word(vocabulary, w_id_cx, part)) else: p_content.append(id2word(vocabulary, w_id_cx)) content.append(p_content) return content def print_ctx(phrs): for idx, p in enumerate(phrs): print(f'\t\t\tc(-{idx+1}): {" ".join([x["word"] for x in p])}') def main(): import argparse Loading query_database.py +11 −13 Original line number Diff line number Diff line Loading @@ -5,6 +5,8 @@ from sqad_db import id2word from sqad_db import id2qt from pprint import pprint import sys import context_np import context_previous_senteces def get_ctx(data, vocabulary, part='', context_type=''): Loading @@ -15,17 +17,11 @@ def get_ctx(data, vocabulary, part='', context_type=''): required_ctx = ['all'] for ctx_type, phrs in data.items(): if ctx_type in required_ctx or 'all' in required_ctx: for p in phrs: p_content = [] for w_id_cx in p: if part: p_content.append(id2word(vocabulary, w_id_cx, part)) else: p_content.append(id2word(vocabulary, w_id_cx)) if sentence_phrases.get(ctx_type): sentence_phrases[ctx_type].append(p_content) else: sentence_phrases[ctx_type] = [p_content] if ctx_type.startswith('name_phrs'): sentence_phrases[ctx_type] = context_np.get_ctx(phrs, vocabulary, part) else: sentence_phrases[ctx_type] = context_previous_senteces.get_ctx(phrs, vocabulary, part) return sentence_phrases Loading Loading @@ -130,8 +126,10 @@ def print_record(db, record_id, context_type=''): print(f'\ts_{idx}: {" ".join([x["word"] for x in sent_and_phrs["sent"]])}') for key, phrs in sent_and_phrs['ctx'].items(): print(f'\t\tctx_type: {key}') for p in phrs: print(f'\t\t\tc: {" ".join([x["word"] for x in p])}') if key.startswith('name_phrs'): context_np.print_ctx(phrs) else: context_previous_senteces.print_ctx(phrs) print('No. text sentences that contain answer') print(f'\t{len(record.similar_answers["sents_containing_ans_ext"])}') Loading Loading
context_np.py +27 −4 Original line number Diff line number Diff line Loading @@ -70,7 +70,7 @@ def name_phrases(text, title, vocabulary, context_window, num_phr_per_sent, w2v) context_phrases = persistent.list.PersistentList() context_position = curr_sent_pos - 1 while (context_position >= 0) and (curr_sent_pos - context_position <= context_window): context_phrases += phrases_per_sentence[context_position][:num_phr_per_sent] context_phrases.append(phrases_per_sentence[context_position][:num_phr_per_sent]) context_position -= 1 # Title as a context for first sentence in document Loading @@ -81,7 +81,7 @@ def name_phrases(text, title, vocabulary, context_window, num_phr_per_sent, w2v) word, lemma, tag = token.strip().split('\t')[:3] wid = word2id(vocabulary, word, lemma, tag, w2v) title_phr.append(wid) context_phrases.append(title_phr) context_phrases.append([title_phr]) text_context.append(context_phrases) Loading Loading @@ -131,14 +131,37 @@ def add_np_phrases(db, context_window, num_phr_per_sent, w2v, verbose=False): for sent_num, sent in enumerate(text['text']): if verbose: print(f"s:{' '.join([id2word(vocabulary, x)['word'] for x in sent['sent']])}") for phr in phrases[sent_num]: print(f'\t\tp:{" ".join([id2word(vocabulary, x)["word"] for x in phr])}') for phrs in phrases[sent_num]: for phr in phrs: print(f'\t\tp:{" ".join([id2word(vocabulary, x)["word"] for x in phr])}') if not sent['ctx'].get(f'name_phrs_w{context_window}_n{num_phr_per_sent}'): sent['ctx'][f'name_phrs_w{context_window}_n{num_phr_per_sent}'] = phrases[sent_num] db._p_changed = True transaction.commit() def get_ctx(phrs, vocabulary, part): sentence_phrases = [] for sent_phr in phrs: phr_per_sent = [] for p in sent_phr: p_content = [] for w_id_cx in p: if part: p_content.append(id2word(vocabulary, w_id_cx, part)) else: p_content.append(id2word(vocabulary, w_id_cx)) phr_per_sent.append(p_content) sentence_phrases.append(phr_per_sent) return sentence_phrases def print_ctx(phrs): for idx, sent_phr in enumerate(phrs): for p in sent_phr: print(f'\t\t\tc(-{idx+1}): {" ".join([x["word"] for x in p])}') def main(): import argparse parser = argparse.ArgumentParser(description='Add noun phrases as context to sentences') Loading
context_previous_senteces.py +18 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,24 @@ def add_ctx(db, number, verbose=False): db._p_changed = True transaction.commit() def get_ctx(phrs, vocabulary, part): content = [] for p in phrs: p_content = [] for w_id_cx in p: if part: p_content.append(id2word(vocabulary, w_id_cx, part)) else: p_content.append(id2word(vocabulary, w_id_cx)) content.append(p_content) return content def print_ctx(phrs): for idx, p in enumerate(phrs): print(f'\t\t\tc(-{idx+1}): {" ".join([x["word"] for x in p])}') def main(): import argparse Loading
query_database.py +11 −13 Original line number Diff line number Diff line Loading @@ -5,6 +5,8 @@ from sqad_db import id2word from sqad_db import id2qt from pprint import pprint import sys import context_np import context_previous_senteces def get_ctx(data, vocabulary, part='', context_type=''): Loading @@ -15,17 +17,11 @@ def get_ctx(data, vocabulary, part='', context_type=''): required_ctx = ['all'] for ctx_type, phrs in data.items(): if ctx_type in required_ctx or 'all' in required_ctx: for p in phrs: p_content = [] for w_id_cx in p: if part: p_content.append(id2word(vocabulary, w_id_cx, part)) else: p_content.append(id2word(vocabulary, w_id_cx)) if sentence_phrases.get(ctx_type): sentence_phrases[ctx_type].append(p_content) else: sentence_phrases[ctx_type] = [p_content] if ctx_type.startswith('name_phrs'): sentence_phrases[ctx_type] = context_np.get_ctx(phrs, vocabulary, part) else: sentence_phrases[ctx_type] = context_previous_senteces.get_ctx(phrs, vocabulary, part) return sentence_phrases Loading Loading @@ -130,8 +126,10 @@ def print_record(db, record_id, context_type=''): print(f'\ts_{idx}: {" ".join([x["word"] for x in sent_and_phrs["sent"]])}') for key, phrs in sent_and_phrs['ctx'].items(): print(f'\t\tctx_type: {key}') for p in phrs: print(f'\t\t\tc: {" ".join([x["word"] for x in p])}') if key.startswith('name_phrs'): context_np.print_ctx(phrs) else: context_previous_senteces.print_ctx(phrs) print('No. text sentences that contain answer') print(f'\t{len(record.similar_answers["sents_containing_ans_ext"])}') Loading