Commit 09f7d0db authored by Marek Medved's avatar Marek Medved
Browse files

backwards compatibility

parent 89ae9be6
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -23,9 +23,9 @@ def find_sentences_containing_answer(db):
        record = db.get_record(rid)
        containing_answer = persistent.list.PersistentList()

        for sent in get_content(record.answer_extraction, vocabulary):
        for sent in get_content(record.answer_extraction, vocabulary, old=False):
            ans_ext_lemma = ' '.join(replace_number_lemma(sent['sent']))
            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary)):
            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old=False)):
                doc_sent_content = " ".join(replace_number_lemma(sent_and_phrs["sent"]))
                if ans_ext_lemma in doc_sent_content:
                    containing_answer.append(idx)
+4 −4
Original line number Diff line number Diff line
@@ -87,7 +87,7 @@ def find_similar_senteces(db, tf_idf):
        record = db.get_record(rid)
        # sys.stderr.write(f'{rid}\n')

        for answer_selection_sent in get_content(record.answer_selection, vocabulary):
        for answer_selection_sent in get_content(record.answer_selection, vocabulary, old=False):

            # Answer selection vector enhanced by TF-IDF
            as_vec = []
@@ -102,7 +102,7 @@ def find_similar_senteces(db, tf_idf):
            v_as = np.mean(as_vec, axis=0)

            # Computing similar sentences within document
            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary)):
            for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old=False)):
                if idx != record.text_answer_position:
                    vec_tf_idf = []
                    for x in sent_and_phrs['sent']:
@@ -160,10 +160,10 @@ def main():
            sorted_sim_sentences = sorted(similar_sentences, key=lambda x: x[1], reverse=True)
            record = db.get_record(rid)
            if args.verbose:
                print(' '.join(get_content(record.answer_selection, vocabulary, part='word')[0]))
                print(' '.join(get_content(record.answer_selection, vocabulary, old=False, part='word')[0]))
                for idx, score in sorted_sim_sentences[:10]:
                    print('{}: {}'.format(score, ' '.join(get_content(kb.url2doc.get(record.text)['text'],
                                                                      vocabulary, part='word')[idx]['sent'])))
                                                                      vocabulary, old=False, part='word')[idx]['sent'])))

            if args.number == 0:
                record.similar_answers[f'sents_similar'] = persistent.list.PersistentList(sorted_sim_sentences)
+11 −9
Original line number Diff line number Diff line
@@ -193,7 +193,7 @@ def get_content(data, vocabulary, old, part='', context_type='', preloaded=False
    return result


def get_record(db, record_id, old, word_parts='', context_type='', vocabulary=None, qa_type_dict=None,
def get_record(db, record_id, old, word_parts='', context_type='all', vocabulary=None, qa_type_dict=None,
               kb=None, preloaded=False):
    """
    :param db: ZODB object, link to database
@@ -268,11 +268,16 @@ def print_record(db, record_id, old, context_type=''):
        print(f'\ts: {" ".join([x["word"] for x in i["sent"]])}')

    print('similar_answers:')
    for key, value in record.similar_answers.items():
        print(f'\t{key}: {value}')
        # for idx, sent_and_phrs in enumerate(get_content_ctx(record.text, kb, vocabulary, part='word')):
        #     if idx in value:
        #         print(f'\t\ts: {" ".join(sent_and_phrs["sent"])}')
    for name, value in record.similar_answers.items():
        if name == 'sents_similar':
            print(f'\t{name}:')
            for s_idx, score in value:
                for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old,
                                                                part='w')):
                    if idx == s_idx:
                        print(f'\t\ts_{idx} ({score}): {" ".join([x["word"] for x in sent_and_phrs["sent"]])}')
        else:
            print(f'\t{name}: {value}')

    print(f'text_title:')
    for i in get_content(kb.url2doc.get(record.text)["title"], vocabulary, old, part="w"):
@@ -282,11 +287,8 @@ def print_record(db, record_id, old, context_type=''):
    for idx, sent_and_phrs in enumerate(get_content(kb.url2doc.get(record.text)['text'], vocabulary, old, part='w',
                                                    context_type=context_type)):
        text_sents_total += 1
        # print(sent_and_phrs['ctx'].keys())
        print(f'\ts_{idx}: {" ".join([x["word"] for x in sent_and_phrs["sent"]])}')
        for key, phrs in sent_and_phrs['ctx'].items():
            # print(phrs)

            try:
                print(f'\t\tctx_type: {key}')
                print_ctx(phrs)
+5 −3
Original line number Diff line number Diff line
@@ -65,7 +65,8 @@ def id2word(vocabulary, key, parts='', preloaded=False):
            try:
                result['v_bert'] = vocabulary['vectors'][key]['v_bert']
            except KeyError:
                sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary["id2wlt"][key]["word"]}\n')
                pass
                # sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary["id2wlt"][key]["word"]}\n')
        if 'id' in word_parts or not parts:
            result['id'] = key
    else:
@@ -77,7 +78,7 @@ def id2word(vocabulary, key, parts='', preloaded=False):
            result['tag'] = vocabulary.id2wlt[key]['tag']

        # Backwards compatibility
        if isinstance(vocabulary.vectors[key], dict):  # New
        if isinstance(vocabulary.vectors[key], BTree):  # New
            if 'v100' in word_parts or not parts:
                result['v100'] = vocabulary.vectors[key]['v100']
            if 'v300' in word_parts or not parts:
@@ -96,7 +97,8 @@ def id2word(vocabulary, key, parts='', preloaded=False):
            try:
                result['v_bert'] = vocabulary.vectors[key]['v_bert']
            except (KeyError, TypeError):
                sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary.id2wlt[key]["word"]}\n')
                pass
                # sys.stderr.write(f'ERROR: not "v_bert" for: {vocabulary.id2wlt[key]["word"]}\n')
        if 'id' in word_parts or not parts:
            result['id'] = key
    return result