Loading Makefile +10 −6 Original line number Diff line number Diff line DB_NAME=sqad_v3_$(shell date +"%d-%m-%Y_%H-%M-%S") create: ./sqad2database.py -p /nlp/projekty/sqad/sqad_v3/data -n sqad_db/$(DB_NAME) ./add_contains_answer_sentences.py -d sqad_db/$(DB_NAME) ./add_similar_senteces.py -d sqad_db/$(DB_NAME) -n 300 ./context_np.py -d sqad_db/$(DB_NAME) ./context_one_previous_sentece.py -d sqad_db/$(DB_NAME) ./query_database.py -d sqad_db/$(DB_NAME) -r 000001 --simple echo "SQAD to DB\n=======================" > $(DB_NAME).log ./sqad2database.py -p /nlp/projekty/sqad/sqad_v3/data -n $(DB_NAME) 2> $(DB_NAME).log echo "Contains answer\n======================" > $(DB_NAME).log ./add_contains_answer_sentences.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log echo "Similar answers\n======================" > $(DB_NAME).log ./add_similar_senteces.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log echo "Contex NP frases\n======================" > $(DB_NAME).log ./context_np.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log echo "Context previous sentece\n======================" > $(DB_NAME).log ./context_previous_senteces.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log No newline at end of file add_contains_answer_sentences.py +5 −1 Original line number Diff line number Diff line Loading @@ -39,11 +39,15 @@ def main(): parser.add_argument('-d', '--db_path', type=str, required=True, help='Database path') parser.add_argument('-v', '--verbose', action='store_true', required=True, default=False, help='Verbose mode') args = parser.parse_args() db = SqadDb(args.db_path) for record, sent_containing_answer in find_sentences_containing_answer(db): print(f'{record.rec_id}: {sent_containing_answer}') if args.verbose: print(f'{record.rec_id}: {sent_containing_answer}') record.similar_answers['sents_containing_ans_ext'] = sent_containing_answer db._p_changed = True transaction.commit() Loading context_np.py +4 −5 Original line number Diff line number Diff line Loading @@ -54,10 +54,10 @@ def name_phrases(text, title, vocabulary, context_window, num_phr_per_sent, w2v) try: word, lemma, tag = spl[:3] except ValueError as e: print(f'Something goes wrong while splitting line: "{token}" in:\n' f'{p}\n' f'splitted as {spl}') print(e) sys.stderr.write(f'Something goes wrong while splitting line: "{token}" in:\n' f'{p}\n' f'splitted as {spl}') sys.stderr.write(f'{e}\n') sys.exit() wid = word2id(vocabulary, word, lemma, tag, w2v) phr.append(wid) Loading Loading @@ -165,7 +165,6 @@ def main(): db.root['__ctx_types__'].append(f'name_phrs_w{args.context_window}_n{args.num_phr_per_sent}') db._p_changed = True transaction.commit() print(db.root['__ctx_types__']) db.close() Loading context_one_previous_sentece.py→context_previous_senteces.py100755 → 100644 +0 −0 File moved.File mode changed from 100755 to 100644. View file sqad2database.py +1 −1 Original line number Diff line number Diff line Loading @@ -127,7 +127,7 @@ def main(): args = parser.parse_args() # db_name = 'sqad_db/{0}_{1:%d_%m_%Y-%H:%M:%S}'.format(args.name, datetime.datetime.now()) db_name =args.name db_name = args.name db = SqadDb(db_name) rec_id_re = re.compile('(\d+)') counter = 0 Loading Loading
Makefile +10 −6 Original line number Diff line number Diff line DB_NAME=sqad_v3_$(shell date +"%d-%m-%Y_%H-%M-%S") create: ./sqad2database.py -p /nlp/projekty/sqad/sqad_v3/data -n sqad_db/$(DB_NAME) ./add_contains_answer_sentences.py -d sqad_db/$(DB_NAME) ./add_similar_senteces.py -d sqad_db/$(DB_NAME) -n 300 ./context_np.py -d sqad_db/$(DB_NAME) ./context_one_previous_sentece.py -d sqad_db/$(DB_NAME) ./query_database.py -d sqad_db/$(DB_NAME) -r 000001 --simple echo "SQAD to DB\n=======================" > $(DB_NAME).log ./sqad2database.py -p /nlp/projekty/sqad/sqad_v3/data -n $(DB_NAME) 2> $(DB_NAME).log echo "Contains answer\n======================" > $(DB_NAME).log ./add_contains_answer_sentences.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log echo "Similar answers\n======================" > $(DB_NAME).log ./add_similar_senteces.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log echo "Contex NP frases\n======================" > $(DB_NAME).log ./context_np.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log echo "Context previous sentece\n======================" > $(DB_NAME).log ./context_previous_senteces.py -d sqad_db/$(DB_NAME) 2>> $(DB_NAME).log No newline at end of file
add_contains_answer_sentences.py +5 −1 Original line number Diff line number Diff line Loading @@ -39,11 +39,15 @@ def main(): parser.add_argument('-d', '--db_path', type=str, required=True, help='Database path') parser.add_argument('-v', '--verbose', action='store_true', required=True, default=False, help='Verbose mode') args = parser.parse_args() db = SqadDb(args.db_path) for record, sent_containing_answer in find_sentences_containing_answer(db): print(f'{record.rec_id}: {sent_containing_answer}') if args.verbose: print(f'{record.rec_id}: {sent_containing_answer}') record.similar_answers['sents_containing_ans_ext'] = sent_containing_answer db._p_changed = True transaction.commit() Loading
context_np.py +4 −5 Original line number Diff line number Diff line Loading @@ -54,10 +54,10 @@ def name_phrases(text, title, vocabulary, context_window, num_phr_per_sent, w2v) try: word, lemma, tag = spl[:3] except ValueError as e: print(f'Something goes wrong while splitting line: "{token}" in:\n' f'{p}\n' f'splitted as {spl}') print(e) sys.stderr.write(f'Something goes wrong while splitting line: "{token}" in:\n' f'{p}\n' f'splitted as {spl}') sys.stderr.write(f'{e}\n') sys.exit() wid = word2id(vocabulary, word, lemma, tag, w2v) phr.append(wid) Loading Loading @@ -165,7 +165,6 @@ def main(): db.root['__ctx_types__'].append(f'name_phrs_w{args.context_window}_n{args.num_phr_per_sent}') db._p_changed = True transaction.commit() print(db.root['__ctx_types__']) db.close() Loading
context_one_previous_sentece.py→context_previous_senteces.py100755 → 100644 +0 −0 File moved.File mode changed from 100755 to 100644. View file
sqad2database.py +1 −1 Original line number Diff line number Diff line Loading @@ -127,7 +127,7 @@ def main(): args = parser.parse_args() # db_name = 'sqad_db/{0}_{1:%d_%m_%Y-%H:%M:%S}'.format(args.name, datetime.datetime.now()) db_name =args.name db_name = args.name db = SqadDb(db_name) rec_id_re = re.compile('(\d+)') counter = 0 Loading