Commit b4fc174c authored by Gaëtan Caillaut
to_tensor respects max_seq_size

parent 0a3f23b1
......@@ -72,8 +72,10 @@ class TrainData:
(len(encoded), self.max_seq_size), -1, dtype=torch.long)
for i, encoded in enumerate(self.tokenizer.encode_batch(sentences)):
sequence_tensor[i, :] = torch.tensor(encoded.ids)
attention_mask_tensor[i, :] = torch.tensor(encoded.attention_mask)
sequence_tensor[i, :] = torch.tensor(
attention_mask_tensor[i, :] = torch.tensor(
for j, wid in enumerate(encoded.word_ids):
if wid is None:
