Commit 215506ec authored by Gaëtan Caillaut's avatar Gaëtan Caillaut
Browse files

attention-scaling & minmax & taylor

parent 81474018
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name mlm-minmax
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t1/train.csv"
DEV="data/cleaned/t1/dev.csv"
TEST="data/cleaned/t1/test.csv"
TOKENIZER="output/tokenizer.json"
OUT_DIR="models/cleaned"
BS=256
DEVICE="cuda"
LOGDIR="runs/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_minmax"
TB_DIR="${LOGDIR}/${RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${RUN_NAME}/checkpoint-${E}.tar"
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --checkpoint ${CHECKPOINT} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
else
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name mlm-lemmatized-minmax
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t1/train.csv"
DEV="data/lemmatized/t1/dev.csv"
TEST="data/lemmatized/t1/test.csv"
TOKENIZER="output/tokenizer_lemmatized.json"
OUT_DIR="models/lemmatized"
BS=256
DEVICE="cuda"
LOGDIR="runs/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_minmax"
TB_DIR="${LOGDIR}/${RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${RUN_NAME}/checkpoint-${E}.tar"
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --checkpoint ${CHECKPOINT} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
else
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1_fs-minmax
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t1/train.csv"
DEV="data/cleaned/t1/dev.csv"
TEST="data/cleaned/t1/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t1_fs/cleaned"
BS=256
DEVICE="cuda"
LOGDIR="runs/t1_fs/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_minmax"
T1_RUN_NAME="d${D}_${ATT}_${POS}_norm_minmax"
TB_DIR="${LOGDIR}/${T1_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T1_RUN_NAME}/checkpoint-${E}.tar"
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --height 1 --depth 1 --attention-scaling minmax
else
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1_fs-lemmatized-minmax
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t1/train.csv"
DEV="data/lemmatized/t1/dev.csv"
TEST="data/lemmatized/t1/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/lemmatized"
OUT_DIR="models/t1_fs/lemmatized"
BS=256
DEVICE="cuda"
LOGDIR="runs/t1_fs/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_minmax"
T1_RUN_NAME="d${D}_${ATT}_${POS}_norm_minmax"
TB_DIR="${LOGDIR}/${T1_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T1_RUN_NAME}/checkpoint-${E}.tar"
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --height 1 --depth 1 --attention-scaling minmax
else
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2_fs-minmax
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t2/train.csv"
DEV="data/cleaned/t2/dev.csv"
TEST="data/cleaned/t2/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t2_fs/cleaned"
BS=256
DEVICE="cuda"
LOGDIR="runs/t2_fs/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_minmax"
T2_RUN_NAME="d${D}_${ATT}_${POS}_norm_minmax"
TB_DIR="${LOGDIR}/${T2_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T2_RUN_NAME}/checkpoint-${E}.tar"
python train.py t2-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --height 1 --depth 1 --attention-scaling minmax
else
python train.py t2-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2_fs-lemmatized-minmax
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t2/train.csv"
DEV="data/lemmatized/t2/dev.csv"
TEST="data/lemmatized/t2/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/lemmatized"
OUT_DIR="models/t2_fs/lemmatized"
BS=256
DEVICE="cuda"
LOGDIR="runs/t2_fs/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_minmax"
T2_RUN_NAME="d${D}_${ATT}_${POS}_norm_minmax"
TB_DIR="${LOGDIR}/${T2_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T2_RUN_NAME}/checkpoint-${E}.tar"
python train.py t2-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --height 1 --depth 1 --attention-scaling minmax
else
python train.py t2-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling minmax
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name mlm-taylor
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t1/train.csv"
DEV="data/cleaned/t1/dev.csv"
TEST="data/cleaned/t1/test.csv"
TOKENIZER="output/tokenizer.json"
OUT_DIR="models/cleaned"
BS=256
DEVICE="cuda"
LOGDIR="runs/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_taylor"
TB_DIR="${LOGDIR}/${RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${RUN_NAME}/checkpoint-${E}.tar"
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --checkpoint ${CHECKPOINT} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling taylor
else
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling taylor
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name mlm-lemmatized-taylor
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t1/train.csv"
DEV="data/lemmatized/t1/dev.csv"
TEST="data/lemmatized/t1/test.csv"
TOKENIZER="output/tokenizer_lemmatized.json"
OUT_DIR="models/lemmatized"
BS=256
DEVICE="cuda"
LOGDIR="runs/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_taylor"
TB_DIR="${LOGDIR}/${RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${RUN_NAME}/checkpoint-${E}.tar"
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --checkpoint ${CHECKPOINT} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling taylor
else
python train.py mlm ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --bs ${BS} --epochs 10 --attention ${ATT} --position ${POS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling taylor
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1_fs-taylor
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t1/train.csv"
DEV="data/cleaned/t1/dev.csv"
TEST="data/cleaned/t1/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t1_fs/cleaned"
BS=256
DEVICE="cuda"
LOGDIR="runs/t1_fs/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_taylor"
T1_RUN_NAME="d${D}_${ATT}_${POS}_norm_taylor"
TB_DIR="${LOGDIR}/${T1_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T1_RUN_NAME}/checkpoint-${E}.tar"
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --height 1 --depth 1 --attention-scaling taylor
else
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling taylor
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t1_fs-lemmatized-taylor
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/lemmatized/t1/train.csv"
DEV="data/lemmatized/t1/dev.csv"
TEST="data/lemmatized/t1/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/lemmatized"
OUT_DIR="models/t1_fs/lemmatized"
BS=256
DEVICE="cuda"
LOGDIR="runs/t1_fs/lemmatized"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_taylor"
T1_RUN_NAME="d${D}_${ATT}_${POS}_norm_taylor"
TB_DIR="${LOGDIR}/${T1_RUN_NAME}"
if ((10#$E>0)); then
CHECKPOINT="${OUT_DIR}/${T1_RUN_NAME}/checkpoint-${E}.tar"
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --checkpoint ${CHECKPOINT} --height 1 --depth 1 --attention-scaling taylor
else
python train.py t1-fs ${TRAIN} ${TEST} ${DEV} ${TOKENIZER} -o ${OUT_DIR} -d ${D} --attention ${ATT} --position ${POS} --epochs 10 --bs ${BS} --device ${DEVICE} --logdir ${TB_DIR} --height 1 --depth 1 --attention-scaling taylor
fi
done
done
done
done
\ No newline at end of file
#!/bin/bash
#SBATCH -N 1
#SBATCH -p gpu
#SBATCH --gres gpu:rtx6000:1
#SBATCH --job-name t2_fs---attention-scaling taylor
#SBATCH --time 10-0
#SBATCH --mem 20G
#SBATCH -o logs/out-%j.txt
#SBATCH -e logs/err-%j.txt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=gaetan.caillaut@univ-lemans.fr
eval "$(conda shell.bash hook)"
conda activate polysemy
TRAIN="data/cleaned/t2/train.csv"
DEV="data/cleaned/t2/dev.csv"
TEST="data/cleaned/t2/test.csv"
TOKENIZER="output/tokenizer.json"
PRETRAINED_DIR="models/cleaned"
OUT_DIR="models/t2_fs/cleaned"
BS=256
DEVICE="cuda"
LOGDIR="runs/t2_fs/cleaned"
for d in ${OUT_DIR} ${LOGDIR}; do
if [ ! -d ${d} ]; then
mkdir -p ${d}
fi
done
export PYTHONPATH="/lium/raid01_b/gcaillaut/polysemy/minibert:${PYTHONPATH}"
set -x
set -e
for E in $(seq -f "%05g" 0 10 40); do
for D in 16 32 64; do
for ATT in "self-attention" "non-transforming" "semi-transforming"; do
for POS in "none" "fixed" "trained"; do
MLM_RUN_NAME="d${D}_${ATT}_${POS}_gelu_norm_taylor"
T2_RUN_NAME="d${D}_${ATT}_${POS}_norm_taylor"
TB_DIR="${LOGDIR}/${T2_RUN_NAME}"