Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gaëtan Caillaut
minibert-deft2018
Commits
78fd2953
Commit
78fd2953
authored
Mar 29, 2021
by
Gaëtan Caillaut
Browse files
Freeze minibert layers
parent
1dd306ac
Changes
5
Hide whitespace changes
Inline
Side-by-side
slurm_scripts/job_t1_frozen.sh
0 → 100755
View file @
78fd2953
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/cleaned/t1/train.csv"
DEV
=
"data/cleaned/t1/dev.csv"
TEST
=
"data/cleaned/t1/test.csv"
TOKENIZER
=
"output/tokenizer.json"
PRETRAINED_DIR
=
"models/cleaned"
OUT_DIR
=
"models/t1/cleaned"
BS
=
512
DEVICE
=
"cuda"
LOGDIR
=
"runs/t1/cleaned"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
for
E
in
$(
seq
-f
"%05g"
0 10 40
)
;
do
for
D
in
16 32 64 96 128
;
do
for
ATT
in
"self-attention"
"non-transforming"
;
do
for
POS
in
"none"
"fixed"
"trained"
;
do
MLM_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_gelu_norm"
T1_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_norm_frozen"
TB_DIR
=
"
${
LOGDIR
}
/
${
T1_RUN_NAME
}
"
if
((
10#
$E
>
0
))
;
then
CHECKPOINT
=
"
${
OUT_DIR
}
/
${
T1_RUN_NAME
}
/checkpoint-
${
E
}
.tar"
python train.py t1
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--checkpoint
${
CHECKPOINT
}
--freeze-attention
else
python train.py t1
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--freeze-attention
fi
done
done
done
done
\ No newline at end of file
slurm_scripts/job_t1_lemmatized_frozen.sh
0 → 100755
View file @
78fd2953
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1-lemmatized-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/lemmatized/t1/train.csv"
DEV
=
"data/lemmatized/t1/dev.csv"
TEST
=
"data/lemmatized/t1/test.csv"
TOKENIZER
=
"output/tokenizer_lemmatized.json"
PRETRAINED_DIR
=
"models/lemmatized"
OUT_DIR
=
"models/t1/lemmatized"
BS
=
512
DEVICE
=
"cuda"
LOGDIR
=
"runs/t1/lemmatized"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
for
E
in
$(
seq
-f
"%05g"
0 10 40
)
;
do
for
D
in
16 32 64 96 128
;
do
for
ATT
in
"self-attention"
"non-transforming"
;
do
for
POS
in
"none"
"fixed"
"trained"
;
do
MLM_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_gelu_norm"
T1_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_norm_frozen"
TB_DIR
=
"
${
LOGDIR
}
/
${
T1_RUN_NAME
}
"
if
((
10#
$E
>
0
))
;
then
CHECKPOINT
=
"
${
OUT_DIR
}
/
${
T1_RUN_NAME
}
/checkpoint-
${
E
}
.tar"
python train.py t1
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--checkpoint
${
CHECKPOINT
}
--freeze-attention
else
python train.py t1
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--freeze-attention
fi
done
done
done
done
\ No newline at end of file
slurm_scripts/job_t2_frozen.sh
0 → 100755
View file @
78fd2953
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/cleaned/t2/train.csv"
DEV
=
"data/cleaned/t2/dev.csv"
TEST
=
"data/cleaned/t2/test.csv"
TOKENIZER
=
"output/tokenizer.json"
PRETRAINED_DIR
=
"models/cleaned"
OUT_DIR
=
"models/t2/cleaned"
BS
=
512
DEVICE
=
"cuda"
LOGDIR
=
"runs/t2/cleaned"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
for
E
in
$(
seq
-f
"%05g"
0 10 40
)
;
do
for
D
in
16 32 64 96 128
;
do
for
ATT
in
"self-attention"
"non-transforming"
;
do
for
POS
in
"none"
"fixed"
"trained"
;
do
MLM_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_gelu_norm"
T2_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_norm_frozen"
TB_DIR
=
"
${
LOGDIR
}
/
${
T2_RUN_NAME
}
"
if
((
10#
$E
>
0
))
;
then
CHECKPOINT
=
"
${
OUT_DIR
}
/
${
T2_RUN_NAME
}
/checkpoint-
${
E
}
.tar"
python train.py t2
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--checkpoint
${
CHECKPOINT
}
--freeze-attention
else
python train.py t2
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--freeze-attention
fi
done
done
done
done
\ No newline at end of file
slurm_scripts/job_t2_lemmatized_frozen.sh
0 → 100755
View file @
78fd2953
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2-lemmatized-frozen
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/lemmatized/t2/train.csv"
DEV
=
"data/lemmatized/t2/dev.csv"
TEST
=
"data/lemmatized/t2/test.csv"
TOKENIZER
=
"output/tokenizer_lemmatized.json"
PRETRAINED_DIR
=
"models/lemmatized"
OUT_DIR
=
"models/t2/lemmatized"
BS
=
512
DEVICE
=
"cuda"
LOGDIR
=
"runs/t2/lemmatized"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
for
E
in
$(
seq
-f
"%05g"
0 10 40
)
;
do
for
D
in
16 32 64 96 128
;
do
for
ATT
in
"self-attention"
"non-transforming"
;
do
for
POS
in
"none"
"fixed"
"trained"
;
do
MLM_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_gelu_norm"
T2_RUN_NAME
=
"d
${
D
}
_
${
ATT
}
_
${
POS
}
_norm_frozen"
TB_DIR
=
"
${
LOGDIR
}
/
${
T2_RUN_NAME
}
"
if
((
10#
$E
>
0
))
;
then
CHECKPOINT
=
"
${
OUT_DIR
}
/
${
T2_RUN_NAME
}
/checkpoint-
${
E
}
.tar"
python train.py t2
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--checkpoint
${
CHECKPOINT
}
--freeze-attention
else
python train.py t2
${
TRAIN
}
${
TEST
}
${
DEV
}
${
TOKENIZER
}
"
${
PRETRAINED_DIR
}
/
${
MLM_RUN_NAME
}
/minibert-model.pt"
-o
${
OUT_DIR
}
-d
${
D
}
--attention
${
ATT
}
--position
${
POS
}
--epochs
10
--bs
${
BS
}
--device
${
DEVICE
}
--logdir
${
TB_DIR
}
--freeze-attention
fi
done
done
done
done
\ No newline at end of file
train.py
View file @
78fd2953
...
...
@@ -39,22 +39,30 @@ def run_name_from_params(args):
def
t1_run_name_from_params
(
args
):
return
"_"
.
join
([
s
=
"_"
.
join
([
f
"d
{
args
.
d
}
"
,
args
.
attention
.
strip
().
lower
(),
args
.
position
.
strip
().
lower
(),
"nonorm"
if
args
.
dont_normalize
else
"norm"
])
if
args
.
freeze_attention
:
s
=
f
"
{
s
}
_frozen"
return
s
def
t2_run_name_from_params
(
args
):
return
"_"
.
join
([
s
=
"_"
.
join
([
f
"d
{
args
.
d
}
"
,
args
.
attention
.
strip
().
lower
(),
args
.
position
.
strip
().
lower
(),
"nonorm"
if
args
.
dont_normalize
else
"norm"
])
if
args
.
freeze_attention
:
s
=
f
"
{
s
}
_frozen"
return
s
def
mlm_model_from_params
(
d
,
attention
,
position
,
tokenizer
,
max_seq_size
,
mask_token
,
pad_token
,
activation
,
device
,
checkpoint_path
=
None
):
if
checkpoint_path
is
None
:
...
...
@@ -369,6 +377,9 @@ def finetune_t1(args):
args
.
model
,
args
.
d
,
attention_type
,
position_type
,
tokenizer
,
max_seq_size
,
mask_token
,
pad_token
,
device
,
checkpoint_path
=
args
.
checkpoint
)
run_name
=
t1_run_name_from_params
(
args
)
if
args
.
freeze_attention
:
model
.
minibert
.
freeze
()
if
args
.
logdir
is
None
:
writer
=
SummaryWriter
(
log_dir
=
f
"runs/t1/
{
run_name
}
"
)
else
:
...
...
@@ -661,6 +672,9 @@ def finetune_t2(args):
args
.
model
,
args
.
d
,
attention_type
,
position_type
,
tokenizer
,
max_seq_size
,
mask_token
,
pad_token
,
device
,
checkpoint_path
=
args
.
checkpoint
)
run_name
=
t2_run_name_from_params
(
args
)
if
args
.
freeze_attention
:
model
.
minibert
.
freeze
()
if
args
.
logdir
is
None
:
writer
=
SummaryWriter
(
log_dir
=
f
"runs/t2/
{
run_name
}
"
)
else
:
...
...
@@ -1268,6 +1282,7 @@ if __name__ == "__main__":
t1_parser
.
add_argument
(
"--epochs-between-save"
,
default
=
10
,
type
=
int
)
t1_parser
.
add_argument
(
"--show-progress"
,
default
=
50
,
type
=
int
)
t1_parser
.
add_argument
(
"--sample"
,
action
=
"store_true"
)
t1_parser
.
add_argument
(
"--freeze-attention"
,
action
=
"store_true"
)
t1_parser
.
set_defaults
(
func
=
finetune_t1
)
t1fs_parser
=
subparsers
.
add_parser
(
"t1-fs"
)
...
...
@@ -1309,6 +1324,7 @@ if __name__ == "__main__":
t2_parser
.
add_argument
(
"--attention"
,
type
=
str
,
default
=
"self-attention"
)
t2_parser
.
add_argument
(
"--position"
,
type
=
str
,
default
=
"fixed"
)
t2_parser
.
add_argument
(
"--dont-normalize"
,
action
=
"store_true"
)
t2_parser
.
add_argument
(
"--freeze-attention"
,
action
=
"store_true"
)
t2_parser
.
add_argument
(
"--activation"
,
type
=
str
,
default
=
"gelu"
)
t2_parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
"cpu"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment