From 09f7d0db31c00998ff13643a37966673f45a9942 Mon Sep 17 00:00:00 2001
From: Marek Medved <marek.medved3@gmail.com>
Date: Thu, 29 Apr 2021 13:57:48 +0200
Subject: [PATCH] backwards compatibility

---
 add_contains_answer_sentences.py |  4 ++--
 add_similar_senteces.py          |  8 ++++----
 query_database.py                | 20 +++++++++++---------
 sqad_db.py                       |  8 +++++---
 4 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/add_contains_answer_sentences.py b/add_contains_answer_sentences.py
index fa4bc71..d3be7da 100755
--- a/add_contains_answer_sentences.py
+++ b/add_contains_answer_sentences.py
@@ -23,9 +23,9 @@ def find_sentences_containing_answer(db):
         record = db.get_record(rid)
         containing_answer = persistent.list.PersistentList()
 
-        for sent in get_content(record.answer_extraction, vocabulary):
+        for sent in get_content(record.answer_extraction, vocabulary, old=False):
             ans_ext_lemma = ' '.join(replace_number_lemma(sent['sent']))
-            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary)):
+            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old=False)):
                 doc_sent_content = " ".join(replace_number_lemma(sent_and_phrs["sent"]))
                 if ans_ext_lemma in doc_sent_content:
                     containing_answer.append(idx)
diff --git a/add_similar_senteces.py b/add_similar_senteces.py
index ae113bf..20c09fc 100755
--- a/add_similar_senteces.py
+++ b/add_similar_senteces.py
@@ -87,7 +87,7 @@ def find_similar_senteces(db, tf_idf):
         record = db.get_record(rid)
         # sys.stderr.write(f'{rid}\n')
 
-        for answer_selection_sent in get_content(record.answer_selection, vocabulary):
+        for answer_selection_sent in get_content(record.answer_selection, vocabulary, old=False):
 
             # Answer selection vector enhanced by TF-IDF
             as_vec = []
@@ -102,7 +102,7 @@ def find_similar_senteces(db, tf_idf):
             v_as = np.mean(as_vec, axis=0)
 
             # Computing similar sentences within document
-            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary)):
+            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old=False)):
                 if idx != record.text_answer_position:
                     vec_tf_idf = []
                     for x in sent_and_phrs['sent']:
@@ -160,10 +160,10 @@ def main():
             sorted_sim_sentences = sorted(similar_sentences, key=lambda x: x[1], reverse=True)
             record = db.get_record(rid)
             if args.verbose:
-                print(' '.join(get_content(record.answer_selection, vocabulary, part='word')[0]))
+                print(' '.join(get_content(record.answer_selection, vocabulary, old=False, part='word')[0]))
                 for idx, score in sorted_sim_sentences[:10]:
                     print('{}: {}'.format(score, ' '.join(get_content(kb.url2doc.get(record.text)['text'],
-                                                                      vocabulary, part='word')[idx]['sent'])))
+                                                                      vocabulary, old=False, part='word')[idx]['sent'])))
 
             if args.number == 0:
                 record.similar_answers[f'sents_similar'] = persistent.list.PersistentList(sorted_sim_sentences)
diff --git a/query_database.py b/query_database.py
index 9e8edda..9c6f805 100755
--- a/query_database.py
+++ b/query_database.py
@@ -193,7 +193,7 @@ def get_content(data, vocabulary, old, part='', context_type='', preloaded=False
     return result
 
 
-def get_record(db, record_id, old, word_parts='', context_type='', vocabulary=None, qa_type_dict=None,
+def get_record(db, record_id, old, word_parts='', context_type='all', vocabulary=None, qa_type_dict=None,
                kb=None, preloaded=False):
     """
     :param db: ZODB object, link to database
@@ -268,11 +268,16 @@ def print_record(db, record_id, old, context_type=''):
         print(f'\ts: {" ".join([x["word"] for x in i["sent"]])}')
 
     print('similar_answers:')
-    for key, value in record.similar_answers.items():
-        print(f'\t{key}: {value}')
-        # for idx, sent_and_phrs in enumerate(get_content_ctx(record.text, kb, vocabulary, part='word')):
-        #     if idx in value:
-        #         print(f'\t\ts: {" ".join(sent_and_phrs["sent"])}')
+    for name, value in record.similar_answers.items():
+        if name == 'sents_similar':
+            print(f'\t{name}:')
+            for s_idx, score in value:
+                for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old,
+                                                                part='w')):
+                    if idx == s_idx:
+                        print(f'\t\ts_{idx} ({score}): {" ".join([x["word"] for x in sent_and_phrs["sent"]])}')
+        else:
+            print(f'\t{name}: {value}')
 
     print(f'text_title:')
     for i in get_content(kb.url2doc.get(record.text)["title"], vocabulary, old, part="w"):
@@ -282,11 +287,8 @@ def print_record(db, record_id, old, context_type=''):
     for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old, part='w',
                                                     context_type=context_type)):
         text_sents_total += 1
-        # print(sent_and_phrs['ctx'].keys())
         print(f'\ts_{idx}: {" ".join([x["word"] for x in sent_and_phrs["sent"]])}')
         for key, phrs in sent_and_phrs['ctx'].items():
-            # print(phrs)
-
             try:
                 print(f'\t\tctx_type: {key}')
                 print_ctx(phrs)
diff --git a/sqad_db.py b/sqad_db.py
index ad037fd..848c0ca 100755
--- a/sqad_db.py
+++ b/sqad_db.py
@@ -65,7 +65,8 @@ def id2word(vocabulary, key, parts='', preloaded=False):
             try:
                 result['v_bert'] = vocabulary['vectors'][key]['v_bert']
             except KeyError:
-                sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary["id2wlt"][key]["word"]}\n')
+                pass
+                # sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary["id2wlt"][key]["word"]}\n')
         if 'id' in word_parts or not parts:
             result['id'] = key
     else:
@@ -77,7 +78,7 @@ def id2word(vocabulary, key, parts='', preloaded=False):
             result['tag'] = vocabulary.id2wlt[key]['tag']
 
         # Backwards compatibility
-        if isinstance(vocabulary.vectors[key], dict):  # New
+        if isinstance(vocabulary.vectors[key], BTree):  # New
             if 'v100' in word_parts or not parts:
                 result['v100'] = vocabulary.vectors[key]['v100']
             if 'v300' in word_parts or not parts:
@@ -96,7 +97,8 @@ def id2word(vocabulary, key, parts='', preloaded=False):
             try:
                 result['v_bert'] = vocabulary.vectors[key]['v_bert']
             except (KeyError, TypeError):
-                sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary.id2wlt[key]["word"]}\n')
+                pass
+                # sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary.id2wlt[key]["word"]}\n')
         if 'id' in word_parts or not parts:
             result['id'] = key
     return result
-- 
GitLab