Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gaëtan Caillaut
minibert-deft2018
Commits
cb65454a
Commit
cb65454a
authored
Apr 02, 2021
by
Gaëtan Caillaut
Browse files
camembert v2
parent
164924ca
Changes
5
Hide whitespace changes
Inline
Side-by-side
slurm_scripts/job_t1_camembert_lemmatized_v2.sh
0 → 100755
View file @
cb65454a
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name camembert-t1-lemmatized-v2
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/lemmatized/t1/train.csv"
DEV
=
"data/lemmatized/t1/dev.csv"
TEST
=
"data/lemmatized/t1/test.csv"
TOKENIZER
=
"output/tokenizer.json"
PRETRAINED_DIR
=
"models/lemmatized"
OUT_DIR
=
"models/t1/lemmatized/camembert-v2"
BS
=
200
DEVICE
=
"cuda"
LOGDIR
=
"runs/t1/lemmatized/camembert-v2"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
python train.py camembert-t1-v2
${
TRAIN
}
${
TEST
}
${
DEV
}
--outdir
${
OUT_DIR
}
--bs
${
BS
}
-e
10
--epochs-between-save
1
--logdir
${
LOGDIR
}
--device
${
DEVICE
}
slurm_scripts/job_t1_camembert_v2.sh
0 → 100755
View file @
cb65454a
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name camembert-t1-v2
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/cleaned/t1/train.csv"
DEV
=
"data/cleaned/t1/dev.csv"
TEST
=
"data/cleaned/t1/test.csv"
TOKENIZER
=
"output/tokenizer.json"
PRETRAINED_DIR
=
"models/cleaned"
OUT_DIR
=
"models/t1/cleaned/camembert-v2"
BS
=
200
DEVICE
=
"cuda"
LOGDIR
=
"runs/t1/cleaned/camembert-v2"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
python train.py camembert-t1-v2
${
TRAIN
}
${
TEST
}
${
DEV
}
--outdir
${
OUT_DIR
}
--bs
${
BS
}
-e
10
--epochs-between-save
1
--logdir
${
LOGDIR
}
--device
${
DEVICE
}
slurm_scripts/job_t2_camembert_lemmatized_v2.sh
0 → 100755
View file @
cb65454a
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name camembert-t2-lemmatized-v2
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/lemmatized/t2/train.csv"
DEV
=
"data/lemmatized/t2/dev.csv"
TEST
=
"data/lemmatized/t2/test.csv"
TOKENIZER
=
"output/tokenizer.json"
PRETRAINED_DIR
=
"models/lemmatized"
OUT_DIR
=
"models/t2/lemmatized/camembert-v2"
BS
=
200
DEVICE
=
"cuda"
LOGDIR
=
"runs/t2/lemmatized/camembert-v2"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
python train.py camembert-t2-v2
${
TRAIN
}
${
TEST
}
${
DEV
}
--outdir
${
OUT_DIR
}
--bs
${
BS
}
-e
10
--epochs-between-save
1
--logdir
${
LOGDIR
}
--device
${
DEVICE
}
slurm_scripts/job_t2_camembert_v2.sh
0 → 100755
View file @
cb65454a
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name camembert-t2
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval
"
$(
conda shell.bash hook
)
"
conda activate polysemy
TRAIN
=
"data/cleaned/t2/train.csv"
DEV
=
"data/cleaned/t2/dev.csv"
TEST
=
"data/cleaned/t2/test.csv"
TOKENIZER
=
"output/tokenizer.json"
PRETRAINED_DIR
=
"models/cleaned"
OUT_DIR
=
"models/t2/cleaned/camembert-v2"
BS
=
200
DEVICE
=
"cuda"
LOGDIR
=
"runs/t2/cleaned/camembert-v2"
for
d
in
${
OUT_DIR
}
${
LOGDIR
}
;
do
if
[
!
-d
${
d
}
]
;
then
mkdir
-p
${
d
}
fi
done
export
PYTHONPATH
=
"/lium/raid01_b/gcaillaut/polysemy/minibert:
${
PYTHONPATH
}
"
set
-x
set
-e
python train.py camembert-t2-v2
${
TRAIN
}
${
TEST
}
${
DEV
}
--outdir
${
OUT_DIR
}
--bs
${
BS
}
-e
10
--epochs-between-save
1
--logdir
${
LOGDIR
}
--device
${
DEVICE
}
train.py
View file @
cb65454a
...
...
@@ -2,15 +2,48 @@ from pathlib import Path
from
datetime
import
datetime
,
timedelta
from
torch.utils.tensorboard
import
SummaryWriter
from
tokenizers
import
Tokenizer
from
transformers
import
CamembertForSequenceClassification
,
CamembertTokenizerFast
from
transformers
import
CamembertForSequenceClassification
,
CamembertTokenizerFast
,
CamembertModel
from
datasets
import
*
from
evaluation
import
*
import
argparse
import
sys
import
torch
from
minibert
import
*
class
MyCamembertForSequenceClassification
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
num_labels
):
super
(
MyCamembertForSequenceClassification
,
self
).
__init__
()
self
.
camembert
=
CamembertModel
.
from_pretrained
(
"camembert-base"
)
self
.
l1
=
nn
.
Linear
(
768
,
768
/
2
,
bias
=
True
)
self
.
l1_activation_fun
=
parse_activation_function
(
"gelu"
)
self
.
l2
=
nn
.
Linear
(
768
/
2
,
num_labels
,
bias
=
True
)
self
.
l2_activation_fun
=
parse_activation_function
(
"none"
)
def
forward
(
self
,
input
,
attention_mask
=
None
):
x
=
self
.
camembert
(
input_ids
=
input
,
attention_mask
=
attention_mask
)
# Average tokens for sentence classification
if
attention_mask
is
None
:
x
=
torch
.
mean
(
x
,
dim
=
1
)
else
:
averaged
=
torch
.
zeros
(
(
x
.
size
(
0
),
768
),
dtype
=
torch
.
float
,
device
=
x
.
device
)
for
i
in
range
(
x
.
size
(
0
)):
token_embs
=
x
[
i
,
attention_mask
[
i
,
:]
>
0
,
:]
averaged
[
i
,
:]
=
torch
.
mean
(
token_embs
,
dim
=
0
)
x
=
averaged
x
=
self
.
l1
(
x
)
x
=
self
.
l1_activation_fun
(
x
)
x
=
self
.
l2
(
x
)
x
=
self
.
l2_activation_fun
(
x
,
dim
=
1
)
return
x
def
parse_position
(
s
):
position_mapper
=
{
"none"
:
PositionalEmbeddingType
.
NONE
,
...
...
@@ -1098,6 +1131,153 @@ def finetune_t1_camembert(args):
torch
.
save
(
optimizer
.
state_dict
(),
str
(
optimizer_out
))
def
finetune_t1_camembert_v2
(
args
):
device
=
args
.
device
pin_memory
=
device
!=
"cpu"
camembert_tokenizer
=
CamembertTokenizerFast
.
from_pretrained
(
"camembert/camembert-base"
)
if
args
.
checkpoint
is
None
:
model
=
MyCamembertForSequenceClassification
(
num_labels
=
2
)
model
=
model
.
to
(
device
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
())
prev_epoch
=
0
else
:
checkpoint
=
torch
.
load
(
args
.
checkpoint
)
# configuration = checkpoint["configuration"]
device
=
checkpoint
[
"device"
]
model
=
MyCamembertForSequenceClassification
(
num_labels
=
2
)
model
.
load_state_dict
(
checkpoint
[
"model_state_dict"
])
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
())
optimizer
.
load_state_dict
(
checkpoint
[
"optimizer_state_dict"
])
prev_epoch
=
checkpoint
[
"epoch"
]
model
.
train
()
deft_collater
=
DEFT2018CollaterForCamembert
(
camembert_tokenizer
,
T1Dataset
.
labels_to_id
())
train_dataset
=
T1Dataset
.
from_csv
(
args
.
train
)
dev_dataset
=
T1Dataset
.
from_csv
(
args
.
dev
)
test_dataset
=
T1Dataset
.
from_csv
(
args
.
test
)
if
args
.
sample
:
train_dataset
=
train_dataset
[:
50
]
test_dataset
=
test_dataset
[:
50
]
train_loader
=
DataLoader
(
train_dataset
,
collate_fn
=
deft_collater
,
batch_size
=
args
.
bs
,
pin_memory
=
pin_memory
)
dev_loader
=
DataLoader
(
dev_dataset
,
collate_fn
=
deft_collater
,
batch_size
=
args
.
bs
,
pin_memory
=
pin_memory
)
test_loader
=
DataLoader
(
test_dataset
,
collate_fn
=
deft_collater
,
batch_size
=
args
.
bs
,
pin_memory
=
pin_memory
)
outdir
=
Path
(
args
.
outdir
)
if
args
.
checkpoint
is
None
:
outdir
.
mkdir
(
exist_ok
=
True
)
writer
=
SummaryWriter
(
log_dir
=
args
.
logdir
)
loss_fun
=
loss
=
torch
.
nn
.
CrossEntropyLoss
()
print
(
"BEGIN TRAINING"
,
flush
=
True
)
for
epoch
in
range
(
prev_epoch
+
1
,
prev_epoch
+
1
+
args
.
epochs
):
model
.
train
()
cumloss
=
0
t0_epoch
=
datetime
.
now
()
batch_cumulated_time
=
timedelta
()
for
batch_id
,
(
x
,
attention_mask
,
labels
)
in
enumerate
(
train_loader
,
1
):
t0_batch
=
datetime
.
now
()
x
=
x
.
to
(
device
)
attention_mask
=
attention_mask
.
to
(
device
)
# wids = wids.to(device)
labels
=
labels
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
x
,
attention_mask
=
attention_mask
)
loss
=
loss_fun
(
output
,
labels
)
loss
.
backward
()
optimizer
.
step
()
cumloss
+=
loss
.
item
()
t1_batch
=
datetime
.
now
()
batch_time
=
t1_batch
-
t0_batch
batch_cumulated_time
+=
batch_time
if
batch_id
%
args
.
show_progress
==
0
:
print
(
f
"EPOCH
{
epoch
}
- BATCH
{
batch_id
:
05
}
- LOSS
{
loss
.
item
()
}
- TIME
{
batch_cumulated_time
}
"
,
flush
=
True
)
batch_cumulated_time
=
timedelta
()
mean_loss
=
cumloss
/
len
(
train_loader
)
writer
.
add_scalar
(
"Loss/train"
,
mean_loss
,
epoch
)
t1_epoch
=
datetime
.
now
()
print
(
f
"EPOCH
{
epoch
:
04
}
- MEAN LOSS
{
mean_loss
}
- TIME
{
t1_epoch
-
t0_epoch
}
"
,
flush
=
True
)
if
epoch
%
args
.
epochs_between_save
==
0
:
model
.
eval
()
tp_dev
,
fp_dev
,
fn_dev
,
recall_dev
,
precision_dev
,
fmeasure_dev
=
fmeasure_deft2018_t1
(
model
,
dev_loader
,
device
)
tp_test
,
fp_test
,
fn_test
,
recall_test
,
precision_test
,
fmeasure_test
=
fmeasure_deft2018_t1
(
model
,
test_loader
,
device
)
writer
.
add_scalar
(
"dev/true positives"
,
tp_dev
,
epoch
)
writer
.
add_scalar
(
"dev/false positives"
,
fp_dev
,
epoch
)
writer
.
add_scalar
(
"dev/false negatives"
,
fn_dev
,
epoch
)
writer
.
add_scalar
(
"dev/recall"
,
recall_dev
,
epoch
)
writer
.
add_scalar
(
"dev/precision"
,
precision_dev
,
epoch
)
writer
.
add_scalar
(
"dev/fmeasure"
,
fmeasure_dev
,
epoch
)
writer
.
add_scalar
(
"test/true positives"
,
tp_test
,
epoch
)
writer
.
add_scalar
(
"test/false positives"
,
fp_test
,
epoch
)
writer
.
add_scalar
(
"test/false negatives"
,
fn_test
,
epoch
)
writer
.
add_scalar
(
"test/recall"
,
recall_test
,
epoch
)
writer
.
add_scalar
(
"test/precision"
,
precision_test
,
epoch
)
writer
.
add_scalar
(
"test/fmeasure"
,
fmeasure_test
,
epoch
)
checkpoint
=
{
"epoch"
:
epoch
,
"model_state_dict"
:
model
.
state_dict
(),
"optimizer_state_dict"
:
optimizer
.
state_dict
(),
"device"
:
device
,
# "configuration": model.config,
"perf"
:
{
"dev"
:
{
"recall"
:
recall_dev
,
"precision"
:
precision_dev
,
"fmeasure"
:
fmeasure_dev
,
"tp"
:
tp_dev
,
"fp"
:
fp_dev
,
"fn"
:
fn_dev
},
"test"
:
{
"recall"
:
recall_test
,
"precision"
:
precision_test
,
"fmeasure"
:
fmeasure_test
,
"tp"
:
tp_test
,
"fp"
:
fp_test
,
"fn"
:
fn_test
},
}
}
outfile
=
Path
(
outdir
,
f
"checkpoint-
{
epoch
:
05
}
.tar"
)
torch
.
save
(
checkpoint
,
str
(
outfile
))
writer
.
flush
()
writer
.
close
()
model_out
=
Path
(
outdir
,
"t1-model.pt"
)
optimizer_out
=
Path
(
outdir
,
"t1-optimizer.pt"
)
torch
.
save
(
model
.
state_dict
(),
str
(
model_out
))
torch
.
save
(
optimizer
.
state_dict
(),
str
(
optimizer_out
))
def
finetune_t2_camembert
(
args
):
device
=
args
.
device
pin_memory
=
device
!=
"cpu"
...
...
@@ -1248,6 +1428,154 @@ def finetune_t2_camembert(args):
torch
.
save
(
optimizer
.
state_dict
(),
str
(
optimizer_out
))
def
finetune_t2_camembert_v2
(
args
):
device
=
args
.
device
pin_memory
=
device
!=
"cpu"
camembert_tokenizer
=
CamembertTokenizerFast
.
from_pretrained
(
"camembert/camembert-base"
,
)
if
args
.
checkpoint
is
None
:
model
=
MyCamembertForSequenceClassification
(
num_labels
=
4
)
model
=
model
.
to
(
device
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
())
prev_epoch
=
0
else
:
checkpoint
=
torch
.
load
(
args
.
checkpoint
)
# configuration = checkpoint["configuration"]
device
=
checkpoint
[
"device"
]
model
=
MyCamembertForSequenceClassification
(
num_labels
=
4
)
model
.
load_state_dict
(
checkpoint
[
"model_state_dict"
])
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
())
optimizer
.
load_state_dict
(
checkpoint
[
"optimizer_state_dict"
])
prev_epoch
=
checkpoint
[
"epoch"
]
model
.
train
()
deft_collater
=
DEFT2018CollaterForCamembert
(
camembert_tokenizer
,
T2Dataset
.
labels_to_id
())
classes
=
set
(
T2Dataset
.
labels_to_id
().
values
())
train_dataset
=
T2Dataset
.
from_csv
(
args
.
train
)
dev_dataset
=
T2Dataset
.
from_csv
(
args
.
dev
)
test_dataset
=
T2Dataset
.
from_csv
(
args
.
test
)
if
args
.
sample
:
train_dataset
=
train_dataset
[:
10
]
test_dataset
=
test_dataset
[:
10
]
train_loader
=
DataLoader
(
train_dataset
,
collate_fn
=
deft_collater
,
batch_size
=
args
.
bs
,
pin_memory
=
pin_memory
)
dev_loader
=
DataLoader
(
dev_dataset
,
collate_fn
=
deft_collater
,
batch_size
=
args
.
bs
,
pin_memory
=
pin_memory
)
test_loader
=
DataLoader
(
test_dataset
,
collate_fn
=
deft_collater
,
batch_size
=
args
.
bs
,
pin_memory
=
pin_memory
)
outdir
=
Path
(
args
.
outdir
)
if
args
.
checkpoint
is
None
:
outdir
.
mkdir
(
exist_ok
=
True
)
writer
=
SummaryWriter
(
log_dir
=
args
.
logdir
)
loss_fun
=
loss
=
torch
.
nn
.
CrossEntropyLoss
()
print
(
"BEGIN TRAINING"
,
flush
=
True
)
for
epoch
in
range
(
prev_epoch
+
1
,
prev_epoch
+
1
+
args
.
epochs
):
model
.
train
()
cumloss
=
0
t0_epoch
=
datetime
.
now
()
batch_cumulated_time
=
timedelta
()
for
batch_id
,
(
x
,
attention_mask
,
labels
)
in
enumerate
(
train_loader
,
1
):
t0_batch
=
datetime
.
now
()
x
=
x
.
to
(
device
)
attention_mask
=
attention_mask
.
to
(
device
)
labels
=
labels
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
x
,
attention_mask
)
loss
=
loss_fun
(
output
,
labels
)
loss
.
backward
()
optimizer
.
step
()
cumloss
+=
loss
.
item
()
t1_batch
=
datetime
.
now
()
batch_time
=
t1_batch
-
t0_batch
batch_cumulated_time
+=
batch_time
if
batch_id
%
args
.
show_progress
==
0
:
print
(
f
"EPOCH
{
epoch
}
- BATCH
{
batch_id
:
05
}
- LOSS
{
loss
.
item
()
}
- TIME
{
batch_cumulated_time
}
"
,
flush
=
True
)
batch_cumulated_time
=
timedelta
()
mean_loss
=
cumloss
/
len
(
train_loader
)
writer
.
add_scalar
(
"Loss/train"
,
mean_loss
,
epoch
)
t1_epoch
=
datetime
.
now
()
print
(
f
"EPOCH
{
epoch
:
04
}
- MEAN LOSS
{
mean_loss
}
- TIME
{
t1_epoch
-
t0_epoch
}
"
,
flush
=
True
)
if
epoch
%
args
.
epochs_between_save
==
0
:
model
.
eval
()
tp_dev
,
fp_dev
,
fn_dev
,
recall_dev
,
precision_dev
,
fmeasure_dev
=
fmeasure_deft2018_t2
(
model
,
dev_loader
,
classes
,
device
)
tp_test
,
fp_test
,
fn_test
,
recall_test
,
precision_test
,
fmeasure_test
=
fmeasure_deft2018_t2
(
model
,
test_loader
,
classes
,
device
)
writer
.
add_scalar
(
"dev/true positives"
,
tp_dev
,
epoch
)
writer
.
add_scalar
(
"dev/false positives"
,
fp_dev
,
epoch
)
writer
.
add_scalar
(
"dev/false negatives"
,
fn_dev
,
epoch
)
writer
.
add_scalar
(
"dev/recall"
,
recall_dev
,
epoch
)
writer
.
add_scalar
(
"dev/precision"
,
precision_dev
,
epoch
)
writer
.
add_scalar
(
"dev/fmeasure"
,
fmeasure_dev
,
epoch
)
writer
.
add_scalar
(
"test/true positives"
,
tp_test
,
epoch
)
writer
.
add_scalar
(
"test/false positives"
,
fp_test
,
epoch
)
writer
.
add_scalar
(
"test/false negatives"
,
fn_test
,
epoch
)
writer
.
add_scalar
(
"test/recall"
,
recall_test
,
epoch
)
writer
.
add_scalar
(
"test/precision"
,
precision_test
,
epoch
)
writer
.
add_scalar
(
"test/fmeasure"
,
fmeasure_test
,
epoch
)
checkpoint
=
{
"epoch"
:
epoch
,
"model_state_dict"
:
model
.
state_dict
(),
"optimizer_state_dict"
:
optimizer
.
state_dict
(),
"device"
:
device
,
# "configuration": model.config,
"perf"
:
{
"dev"
:
{
"recall"
:
recall_dev
,
"precision"
:
precision_dev
,
"fmeasure"
:
fmeasure_dev
,
"tp"
:
tp_dev
,
"fp"
:
fp_dev
,
"fn"
:
fn_dev
},
"test"
:
{
"recall"
:
recall_test
,
"precision"
:
precision_test
,
"fmeasure"
:
fmeasure_test
,
"tp"
:
tp_test
,
"fp"
:
fp_test
,
"fn"
:
fn_test
},
}
}
outfile
=
Path
(
outdir
,
f
"checkpoint-
{
epoch
:
05
}
.tar"
)
torch
.
save
(
checkpoint
,
str
(
outfile
))
writer
.
flush
()
writer
.
close
()
model_out
=
Path
(
outdir
,
"t2-model.pt"
)
optimizer_out
=
Path
(
outdir
,
"t2-optimizer.pt"
)
torch
.
save
(
model
.
state_dict
(),
str
(
model_out
))
torch
.
save
(
optimizer
.
state_dict
(),
str
(
optimizer_out
))
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -1425,7 +1753,37 @@ if __name__ == "__main__":
cam_t2_parser
.
add_argument
(
"--epochs-between-save"
,
default
=
10
,
type
=
int
)
cam_t2_parser
.
add_argument
(
"--show-progress"
,
default
=
50
,
type
=
int
)
cam_t2_parser
.
add_argument
(
"--sample"
,
action
=
"store_true"
)
cam_t2_parser
.
set_defaults
(
func
=
finetune_t2_camembert
)
cam_t2_parser
.
set_defaults
(
func
=
finetune_t2_camembert_v2
)
cam_t1_parser
=
subparsers
.
add_parser
(
"camembert-t1-v2"
)
cam_t1_parser
.
add_argument
(
"train"
)
cam_t1_parser
.
add_argument
(
"test"
)
cam_t1_parser
.
add_argument
(
"dev"
)
cam_t1_parser
.
add_argument
(
"-o"
,
"--outdir"
,
type
=
str
)
cam_t1_parser
.
add_argument
(
"--bs"
,
type
=
int
,
default
=
128
)
cam_t1_parser
.
add_argument
(
"-e"
,
"--epochs"
,
type
=
int
,
default
=
100
)
cam_t1_parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
"cpu"
)
cam_t1_parser
.
add_argument
(
"--logdir"
,
type
=
str
,
required
=
True
)
cam_t1_parser
.
add_argument
(
"-c"
,
"--checkpoint"
,
type
=
str
,
required
=
False
)
cam_t1_parser
.
add_argument
(
"--epochs-between-save"
,
default
=
10
,
type
=
int
)
cam_t1_parser
.
add_argument
(
"--show-progress"
,
default
=
50
,
type
=
int
)
cam_t1_parser
.
add_argument
(
"--sample"
,
action
=
"store_true"
)
cam_t1_parser
.
set_defaults
(
func
=
finetune_t1_camembert
)
cam_t2_parser
=
subparsers
.
add_parser
(
"camembert-t2-v2"
)
cam_t2_parser
.
add_argument
(
"train"
)
cam_t2_parser
.
add_argument
(
"test"
)
cam_t2_parser
.
add_argument
(
"dev"
)
cam_t2_parser
.
add_argument
(
"-o"
,
"--outdir"
,
type
=
str
)
cam_t2_parser
.
add_argument
(
"--bs"
,
type
=
int
,
default
=
128
)
cam_t2_parser
.
add_argument
(
"-e"
,
"--epochs"
,
type
=
int
,
default
=
100
)
cam_t2_parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
"cpu"
)
cam_t2_parser
.
add_argument
(
"--logdir"
,
type
=
str
,
required
=
True
)
cam_t2_parser
.
add_argument
(
"-c"
,
"--checkpoint"
,
type
=
str
,
required
=
False
)
cam_t2_parser
.
add_argument
(
"--epochs-between-save"
,
default
=
10
,
type
=
int
)
cam_t2_parser
.
add_argument
(
"--show-progress"
,
default
=
50
,
type
=
int
)
cam_t2_parser
.
add_argument
(
"--sample"
,
action
=
"store_true"
)
cam_t2_parser
.
set_defaults
(
func
=
finetune_t2_camembert_v2
)
args
=
parser
.
parse_args
()
args
.
func
(
args
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment