Loading add_similar_senteces.py +8 −5 Original line number Diff line number Diff line Loading @@ -110,6 +110,9 @@ def find_similar_senteces(db, tf_idf): v_sent = np.mean(vec_tf_idf, axis=0) cos_sim = 1 - spatial.distance.cosine(v_as, v_sent) # Filter exact answers if not idx in record.similar_answers['sents_containing_ans_ext']: similar_senteces.append((idx, cos_sim)) yield rid, similar_senteces Loading @@ -133,15 +136,15 @@ def main(): vocabulary, _, kb = db.get_dicts() tf_idf = compute_tfidf(kb, vocabulary) for rid, similar_senteces in find_similar_senteces(db, tf_idf): sorted_sim_senteces = sorted(similar_senteces, key = lambda x: x[1], reverse=True) for rid, similar_sentences in find_similar_senteces(db, tf_idf): sorted_sim_sentences = sorted(similar_sentences, key = lambda x: x[1], reverse=True) record = db.get_record(rid) if args.verbose: print(' '.join(get_content(record.answer_selection, vocabulary, part='word')[0])) for idx, score in sorted_sim_senteces[:10]: for idx, score in sorted_sim_sentences[:10]: print('{}: {}'.format(score, ' '.join(get_content_ctx(record.text, kb, vocabulary, part='word')[idx]['sent']))) record.similar_answers[f'sents_similar_{args.number}'] = persistent.list.PersistentList(sorted_sim_senteces[:args.number]) record.similar_answers[f'sents_similar_{args.number}'] = persistent.list.PersistentList(sorted_sim_sentences[:args.number]) db._p_changed = True transaction.commit() db.close() Loading Loading
add_similar_senteces.py +8 −5 Original line number Diff line number Diff line Loading @@ -110,6 +110,9 @@ def find_similar_senteces(db, tf_idf): v_sent = np.mean(vec_tf_idf, axis=0) cos_sim = 1 - spatial.distance.cosine(v_as, v_sent) # Filter exact answers if not idx in record.similar_answers['sents_containing_ans_ext']: similar_senteces.append((idx, cos_sim)) yield rid, similar_senteces Loading @@ -133,15 +136,15 @@ def main(): vocabulary, _, kb = db.get_dicts() tf_idf = compute_tfidf(kb, vocabulary) for rid, similar_senteces in find_similar_senteces(db, tf_idf): sorted_sim_senteces = sorted(similar_senteces, key = lambda x: x[1], reverse=True) for rid, similar_sentences in find_similar_senteces(db, tf_idf): sorted_sim_sentences = sorted(similar_sentences, key = lambda x: x[1], reverse=True) record = db.get_record(rid) if args.verbose: print(' '.join(get_content(record.answer_selection, vocabulary, part='word')[0])) for idx, score in sorted_sim_senteces[:10]: for idx, score in sorted_sim_sentences[:10]: print('{}: {}'.format(score, ' '.join(get_content_ctx(record.text, kb, vocabulary, part='word')[idx]['sent']))) record.similar_answers[f'sents_similar_{args.number}'] = persistent.list.PersistentList(sorted_sim_senteces[:args.number]) record.similar_answers[f'sents_similar_{args.number}'] = persistent.list.PersistentList(sorted_sim_sentences[:args.number]) db._p_changed = True transaction.commit() db.close() Loading