Commit 78fd2953 authored by Gaëtan Caillaut's avatar Gaëtan Caillaut
Browse files

Freeze minibert layers

parent 1dd306ac
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t1/train.csv"
DEV="data/cleaned/t1/dev.csv"
TEST="data/cleaned/t1/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t1/cleaned"
BS=512
DEVICE="cuda"
LOGDIR="runs/t1/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64 96 128; do
for ATT in "self-attention" "non-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm"
T1_RUN_NAME="d${D}_${ATT}_${POS}_norm_frozen"
TB_DIR="${LOGDIR}/${T1_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T1_RUN_NAME}/checkpoint-${E}.tar"
python train.py t1 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --freeze-attention
else
python train.py t1 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --freeze-attention
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1-lemmatized-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t1/train.csv"
DEV="data/lemmatized/t1/dev.csv"
TEST="data/lemmatized/t1/test.csv"
TOKENIZER="output/tokenizer_lemmatized.json"
PRETRAINED_DIR="models/lemmatized"
OUT_DIR="models/t1/lemmatized"
BS=512
DEVICE="cuda"
LOGDIR="runs/t1/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64 96 128; do
for ATT in "self-attention" "non-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm"
T1_RUN_NAME="d${D}_${ATT}_${POS}_norm_frozen"
TB_DIR="${LOGDIR}/${T1_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T1_RUN_NAME}/checkpoint-${E}.tar"
python train.py t1 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --freeze-attention
else
python train.py t1 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --freeze-attention
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t2/train.csv"
DEV="data/cleaned/t2/dev.csv"
TEST="data/cleaned/t2/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t2/cleaned"
BS=512
DEVICE="cuda"
LOGDIR="runs/t2/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64 96 128; do
for ATT in "self-attention" "non-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm"
T2_RUN_NAME="d${D}_${ATT}_${POS}_norm_frozen"
TB_DIR="${LOGDIR}/${T2_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T2_RUN_NAME}/checkpoint-${E}.tar"
python train.py t2 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --freeze-attention
else
python train.py t2 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --freeze-attention
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2-lemmatized-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t2/train.csv"
DEV="data/lemmatized/t2/dev.csv"
TEST="data/lemmatized/t2/test.csv"
TOKENIZER="output/tokenizer_lemmatized.json"
PRETRAINED_DIR="models/lemmatized"
OUT_DIR="models/t2/lemmatized"
BS=512
DEVICE="cuda"
LOGDIR="runs/t2/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64 96 128; do
for ATT in "self-attention" "non-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm"
T2_RUN_NAME="d${D}_${ATT}_${POS}_norm_frozen"
TB_DIR="${LOGDIR}/${T2_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T2_RUN_NAME}/checkpoint-${E}.tar"
python train.py t2 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --freeze-attention
else
python train.py t2 ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} "${PRETRAINED_DIR}/${MLM_RUN_NAME}/minibert-model.pt" -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --freeze-attention
fi
done
done
done
done
\ No newline at end of file
......@@ -39,22 +39,30 @@ def run_name_from_params(args):
def t1_run_name_from_params(args):
return "_".join([
s = "_".join([
f"d{args.d}",
args.attention.strip().lower(),
args.position.strip().lower(),
"nonorm" if args.dont_normalize else "norm"
])
if args.freeze_attention:
s = f"{s}_frozen"
return s
def t2_run_name_from_params(args):
return "_".join([
s = "_".join([
f"d{args.d}",
args.attention.strip().lower(),
args.position.strip().lower(),
"nonorm" if args.dont_normalize else "norm"
])
if args.freeze_attention:
s = f"{s}_frozen"
return s
def mlm_model_from_params(d, attention, position, tokenizer, max_seq_size, mask_token, pad_token, activation, device, checkpoint_path=None):
if checkpoint_path is None:
......@@ -369,6 +377,9 @@ def finetune_t1(args):
args.model, args.d, attention_type, position_type, tokenizer, max_seq_size, mask_token, pad_token, device, checkpoint_path=args.checkpoint)
run_name = t1_run_name_from_params(args)
if args.freeze_attention:
model.minibert.freeze()
if args.logdir is None:
writer = SummaryWriter(log_dir=f"runs/t1/{run_name}")
else:
......@@ -661,6 +672,9 @@ def finetune_t2(args):
args.model, args.d, attention_type, position_type, tokenizer, max_seq_size, mask_token, pad_token, device, checkpoint_path=args.checkpoint)
run_name = t2_run_name_from_params(args)
if args.freeze_attention:
model.minibert.freeze()
if args.logdir is None:
writer = SummaryWriter(log_dir=f"runs/t2/{run_name}")
else:
......@@ -1268,6 +1282,7 @@ if __name__ == "__main__":
t1_parser.add_argument("--epochs-between-save", default=10, type=int)
t1_parser.add_argument("--show-progress", default=50, type=int)
t1_parser.add_argument("--sample", action="store_true")
t1_parser.add_argument("--freeze-attention", action="store_true")
t1_parser.set_defaults(func=finetune_t1)
t1fs_parser = subparsers.add_parser("t1-fs")
......@@ -1309,6 +1324,7 @@ if __name__ == "__main__":
t2_parser.add_argument("--attention", type=str, default="self-attention")
t2_parser.add_argument("--position", type=str, default="fixed")
t2_parser.add_argument("--dont-normalize", action="store_true")
t2_parser.add_argument("--freeze-attention", action="store_true")
t2_parser.add_argument("--activation", type=str, default="gelu")
t2_parser.add_argument("--device", type=str, default="cpu")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment