Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gaëtan Caillaut
hatespeech
Commits
6763d490
Commit
6763d490
authored
Jun 09, 2021
by
Gaëtan Caillaut
Browse files
max_length
parent
d74cda16
Changes
2
Hide whitespace changes
Inline
Side-by-side
data.py
View file @
6763d490
...
...
@@ -61,7 +61,7 @@ class HateSpeechCollater:
try
:
inputs
=
self
.
tokenizer
(
sentences
,
return_tensors
=
"pt"
,
padding
=
True
,
truncation
=
True
)
sentences
,
return_tensors
=
"pt"
,
padding
=
True
,
truncation
=
True
,
max_length
=
128
)
except
TypeError
:
pad_id
=
self
.
tokenizer
.
token_to_id
(
"<pad>"
)
self
.
tokenizer
.
enable_padding
(
pad_id
=
pad_id
)
...
...
train_roberta.py
View file @
6763d490
...
...
@@ -25,9 +25,9 @@ if __name__ == "__main__":
for
fold
,
(
train
,
test
)
in
enumerate
(
dataset
.
iter_folds
(
args
.
folds
,
True
),
1
):
train_loader
=
DataLoader
(
train
,
collate_fn
=
collater
,
shuffle
=
True
,
batch_size
=
64
,
pin_memory
=
pm
)
train
,
collate_fn
=
collater
,
shuffle
=
True
,
batch_size
=
128
,
pin_memory
=
pm
)
test_loader
=
DataLoader
(
test
,
collate_fn
=
collater
,
shuffle
=
False
,
batch_size
=
64
,
pin_memory
=
pm
)
test
,
collate_fn
=
collater
,
shuffle
=
False
,
batch_size
=
128
,
pin_memory
=
pm
)
if
args
.
jobname
is
not
None
:
writer
=
SummaryWriter
(
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment