Commit dfaf429b authored by Gaëtan Caillaut's avatar Gaëtan Caillaut
Browse files

Camembert avec couches cachée gelées

parent 7b94ad7d
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name camembert-t1-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t1/train.csv"
DEV="data/cleaned/t1/dev.csv"
TEST="data/cleaned/t1/test.csv"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t1/cleaned/camembert-frozen"
BS=200
DEVICE="cuda"
LOGDIR="runs/t1/cleaned/camembert-frozen"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
python train.py camembert-t1 ${TRAIN} ${TEST} ${DEV} --outdir ${OUT_DIR} --bs ${BS} -e 10 --epochs-between-save 1 --logdir ${LOGDIR} --device ${DEVICE} --freeze
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name camembert-t2-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t2/train.csv"
DEV="data/cleaned/t2/dev.csv"
TEST="data/cleaned/t2/test.csv"
OUT_DIR="models/t2/cleaned/camembert-frozen"
BS=200
DEVICE="cuda"
LOGDIR="runs/t2/cleaned/camembert-frozen"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
python train.py camembert-t2 ${TRAIN} ${TEST} ${DEV} --outdir ${OUT_DIR} --bs ${BS} -e 10 --epochs-between-save 1 --logdir ${LOGDIR} --device ${DEVICE}
......@@ -23,7 +23,8 @@ class MyCamembertForSequenceClassification(torch.nn.Module):
self.l2 = torch.nn.Linear(768//2, num_labels, bias=True)
def forward(self, input, attention_mask=None):
outputs = self.camembert(input_ids=input, attention_mask=attention_mask)
outputs = self.camembert(
input_ids=input, attention_mask=attention_mask)
x = outputs.last_hidden_state
# Average tokens for sentence classification
......@@ -1033,6 +1034,10 @@ def finetune_t1_camembert(args):
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
prev_epoch = checkpoint["epoch"]
if args.freeze:
for param in model.roberta.roberta.parameters():
param.requires_grad = False
model.train()
deft_collater = DEFT2018CollaterForCamembert(
......@@ -1178,6 +1183,10 @@ def finetune_t1_camembert_v2(args):
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
prev_epoch = checkpoint["epoch"]
if args.freeze:
for param in model.camembert.parameters():
param.requires_grad = False
model.train()
deft_collater = DEFT2018CollaterForCamembert(
......@@ -1330,6 +1339,10 @@ def finetune_t2_camembert(args):
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
prev_epoch = checkpoint["epoch"]
if args.freeze:
for param in model.roberta.roberta.parameters():
param.requires_grad = False
model.train()
deft_collater = DEFT2018CollaterForCamembert(
......@@ -1476,6 +1489,10 @@ def finetune_t2_camembert_v2(args):
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
prev_epoch = checkpoint["epoch"]
if args.freeze:
for param in model.camembert.parameters():
param.requires_grad = False
model.train()
deft_collater = DEFT2018CollaterForCamembert(
......@@ -1767,6 +1784,7 @@ if __name__ == "__main__":
cam_t1_parser.add_argument("--epochs-between-save", default=10, type=int)
cam_t1_parser.add_argument("--show-progress", default=50, type=int)
cam_t1_parser.add_argument("--sample", action="store_true")
cam_t1_parser.add_argument("--freeze", action="store_true")
cam_t1_parser.set_defaults(func=finetune_t1_camembert)
cam_t2_parser = subparsers.add_parser("camembert-t2")
......@@ -1782,37 +1800,44 @@ if __name__ == "__main__":
cam_t2_parser.add_argument("--epochs-between-save", default=10, type=int)
cam_t2_parser.add_argument("--show-progress", default=50, type=int)
cam_t2_parser.add_argument("--sample", action="store_true")
cam_t2_parser.set_defaults(func=finetune_t2_camembert_v2)
cam_t1_parser = subparsers.add_parser("camembert-t1-v2")
cam_t1_parser.add_argument("train")
cam_t1_parser.add_argument("test")
cam_t1_parser.add_argument("dev")
cam_t1_parser.add_argument("-o", "--outdir", type=str)
cam_t1_parser.add_argument("--bs", type=int, default=128)
cam_t1_parser.add_argument("-e", "--epochs", type=int, default=100)
cam_t1_parser.add_argument("--device", type=str, default="cpu")
cam_t1_parser.add_argument("--logdir", type=str, required=True)
cam_t1_parser.add_argument("-c", "--checkpoint", type=str, required=False)
cam_t1_parser.add_argument("--epochs-between-save", default=10, type=int)
cam_t1_parser.add_argument("--show-progress", default=50, type=int)
cam_t1_parser.add_argument("--sample", action="store_true")
cam_t1_parser.set_defaults(func=finetune_t1_camembert_v2)
cam_t2_parser = subparsers.add_parser("camembert-t2-v2")
cam_t2_parser.add_argument("train")
cam_t2_parser.add_argument("test")
cam_t2_parser.add_argument("dev")
cam_t2_parser.add_argument("-o", "--outdir", type=str)
cam_t2_parser.add_argument("--bs", type=int, default=128)
cam_t2_parser.add_argument("-e", "--epochs", type=int, default=100)
cam_t2_parser.add_argument("--device", type=str, default="cpu")
cam_t2_parser.add_argument("--logdir", type=str, required=True)
cam_t2_parser.add_argument("-c", "--checkpoint", type=str, required=False)
cam_t2_parser.add_argument("--epochs-between-save", default=10, type=int)
cam_t2_parser.add_argument("--show-progress", default=50, type=int)
cam_t2_parser.add_argument("--sample", action="store_true")
cam_t2_parser.set_defaults(func=finetune_t2_camembert_v2)
cam_t2_parser.add_argument("--freeze", action="store_true")
cam_t2_parser.set_defaults(func=finetune_t2_camembert)
cam_t1_v2_parser = subparsers.add_parser("camembert-t1-v2")
cam_t1_v2_parser.add_argument("train")
cam_t1_v2_parser.add_argument("test")
cam_t1_v2_parser.add_argument("dev")
cam_t1_v2_parser.add_argument("-o", "--outdir", type=str)
cam_t1_v2_parser.add_argument("--bs", type=int, default=128)
cam_t1_v2_parser.add_argument("-e", "--epochs", type=int, default=100)
cam_t1_v2_parser.add_argument("--device", type=str, default="cpu")
cam_t1_v2_parser.add_argument("--logdir", type=str, required=True)
cam_t1_v2_parser.add_argument(
"-c", "--checkpoint", type=str, required=False)
cam_t1_v2_parser.add_argument(
"--epochs-between-save", default=10, type=int)
cam_t1_v2_parser.add_argument("--show-progress", default=50, type=int)
cam_t1_v2_parser.add_argument("--sample", action="store_true")
cam_t1_v2_parser.add_argument("--freeze", action="store_true")
cam_t1_v2_parser.set_defaults(func=finetune_t1_camembert_v2)
cam_t2_v2_parser = subparsers.add_parser("camembert-t2-v2")
cam_t2_v2_parser.add_argument("train")
cam_t2_v2_parser.add_argument("test")
cam_t2_v2_parser.add_argument("dev")
cam_t2_v2_parser.add_argument("-o", "--outdir", type=str)
cam_t2_v2_parser.add_argument("--bs", type=int, default=128)
cam_t2_v2_parser.add_argument("-e", "--epochs", type=int, default=100)
cam_t2_v2_parser.add_argument("--device", type=str, default="cpu")
cam_t2_v2_parser.add_argument("--logdir", type=str, required=True)
cam_t2_v2_parser.add_argument(
"-c", "--checkpoint", type=str, required=False)
cam_t2_v2_parser.add_argument(
"--epochs-between-save", default=10, type=int)
cam_t2_v2_parser.add_argument("--show-progress", default=50, type=int)
cam_t2_v2_parser.add_argument("--sample", action="store_true")
cam_t2_v2_parser.add_argument("--freeze", action="store_true")
cam_t2_v2_parser.set_defaults(func=finetune_t2_camembert_v2)
args = parser.parse_args()
args.func(args)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment