Commit bc032501 authored by Marek Medved's avatar Marek Medved
Browse files

remove exact answers from similar senteces

parent 6e477f3b
Loading
Loading
Loading
Loading
+8 −5
Original line number Diff line number Diff line
@@ -110,6 +110,9 @@ def find_similar_senteces(db, tf_idf):
                    v_sent = np.mean(vec_tf_idf, axis=0)

                    cos_sim = 1 - spatial.distance.cosine(v_as, v_sent)

                    # Filter exact answers
                    if not idx in record.similar_answers['sents_containing_ans_ext']:
                        similar_senteces.append((idx, cos_sim))

        yield rid, similar_senteces
@@ -133,15 +136,15 @@ def main():
    vocabulary, _, kb = db.get_dicts()
    tf_idf = compute_tfidf(kb, vocabulary)

    for rid, similar_senteces in find_similar_senteces(db, tf_idf):
        sorted_sim_senteces = sorted(similar_senteces, key = lambda x: x[1], reverse=True)
    for rid, similar_sentences in find_similar_senteces(db, tf_idf):
        sorted_sim_sentences = sorted(similar_sentences, key = lambda x: x[1], reverse=True)
        record = db.get_record(rid)
        if args.verbose:
            print(' '.join(get_content(record.answer_selection, vocabulary, part='word')[0]))
            for idx, score in sorted_sim_senteces[:10]:
            for idx, score in sorted_sim_sentences[:10]:
                print('{}: {}'.format(score, ' '.join(get_content_ctx(record.text, kb, vocabulary, part='word')[idx]['sent'])))

        record.similar_answers[f'sents_similar_{args.number}'] = persistent.list.PersistentList(sorted_sim_senteces[:args.number])
        record.similar_answers[f'sents_similar_{args.number}'] = persistent.list.PersistentList(sorted_sim_sentences[:args.number])
        db._p_changed = True
        transaction.commit()
    db.close()