Commit e560b18f authored by Vít Novotný's avatar Vít Novotný
Browse files

Evaluate three different schedules in `03_train_ner_models.sh`

parent 0b717e6d
Pipeline #147620 passed with stage
in 11 minutes and 39 seconds
......@@ -27,7 +27,8 @@ if __name__ == '__main__':
validation_tagged_sentence_basename = f'{tagged_sentence_basename}_validation'
try:
NerModel.load(model_basename)
model = NerModel.load(model_basename)
model.model # Try actually loading the NER model
except EnvironmentError:
NerModel.train_and_save(model_checkpoint_basename, model_basename,
training_sentence_basename, validation_sentence_basename,
......
......@@ -8,14 +8,13 @@ IMAGE_NAME=ahisto/named-entity-search:latest
ROOT_PATH=/nlp/projekty/ahisto/public_html/named-entity-search/results/
ANNOTATION_PATH=/nlp/projekty/ahisto/annotations/
OCR_EVAL_PATH=/nlp/projekty/ahisto/ahisto-ocr-eval
SCHEDULE_NAME=fine-tuning
DOCKER_BUILDKIT=1 docker build --build-arg UID="$(id -u)" --build-arg GID="$(id -g)" --build-arg UNAME="$(id -u -n)" . -f scripts/03_train_ner_models.Dockerfile -t "$IMAGE_NAME"
parallel --halt=soon,fail=100% --jobs=4 --bar --delay 120 --resume-failed \
--joblog scripts/03_train_ner_models_"${SCHEDULE_NAME}".joblog \
parallel --halt=soon,fail=100% --jobs=12 --bar --delay 120 --resume-failed \
--joblog scripts/03_train_ner_models.joblog \
--colsep ' +' \
-- \
'GPU_ID=$(nvidia-smi | grep -B 1 -- " [0-9]MiB / 15360MiB" | tail -n 2 | head -n 1 | awk "{ print \$2 }"); '\
'docker run --rm -u "$(id -u):$(id -g)" --hostname "'"$HOSTNAME"'" --runtime=nvidia -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e NVIDIA_VISIBLE_DEVICES="$GPU_ID" -e TOKENIZERS_PARALLELISM=false -e COMET_API_KEY -v "$PWD"/..:/workdir:rw -w /workdir/"${PWD##*/}" -v "'"$ROOT_PATH"'":"'"$ROOT_PATH"'":rw -v "'"$ANNOTATION_PATH"'":"'"$ANNOTATION_PATH"'":ro -v "'"$OCR_EVAL_PATH"'":"'"$OCR_EVAL_PATH"'":ro "'"$IMAGE_NAME"'" nice -n 19 python scripts/03_train_ner_models.py {1} {2} {3} '"$SCHEDULE_NAME" \
'docker run --rm -u "$(id -u):$(id -g)" --hostname "'"$HOSTNAME"'" --runtime=nvidia -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e NVIDIA_VISIBLE_DEVICES="$GPU_ID" -e TOKENIZERS_PARALLELISM=false -e COMET_API_KEY -v "$PWD"/..:/workdir:rw -w /workdir/"${PWD##*/}" -v "'"$ROOT_PATH"'":"'"$ROOT_PATH"'":rw -v "'"$ANNOTATION_PATH"'":"'"$ANNOTATION_PATH"'":ro -v "'"$OCR_EVAL_PATH"'":"'"$OCR_EVAL_PATH"'":ro "'"$IMAGE_NAME"'" nice -n 19 python scripts/03_train_ner_models.py {1} {2} {3} {4} ' \
:::: scripts/03_train_ner_models.tasks
manatee non-crossing only-relevant
manatee all only-relevant
fuzzy-regex non-crossing only-relevant
fuzzy-regex all only-relevant
manatee non-crossing all
manatee all all
fuzzy-regex non-crossing all
fuzzy-regex all all
manatee non-crossing only-relevant fair-sequential
manatee non-crossing only-relevant fine-tuning
manatee non-crossing only-relevant parallel
manatee all only-relevant fair-sequential
manatee all only-relevant fine-tuning
manatee all only-relevant parallel
fuzzy-regex non-crossing only-relevant fair-sequential
fuzzy-regex non-crossing only-relevant fine-tuning
fuzzy-regex non-crossing only-relevant parallel
fuzzy-regex all only-relevant fair-sequential
fuzzy-regex all only-relevant fine-tuning
fuzzy-regex all only-relevant parallel
manatee non-crossing all fair-sequential
manatee non-crossing all fine-tuning
manatee non-crossing all parallel
manatee all all fair-sequential
manatee all all fine-tuning
manatee all all parallel
fuzzy-regex non-crossing all fair-sequential
fuzzy-regex non-crossing all fine-tuning
fuzzy-regex non-crossing all parallel
fuzzy-regex all all fair-sequential
fuzzy-regex all all fine-tuning
fuzzy-regex all all parallel
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment