Commit 505d4158 authored by Colleen Beaumard's avatar Colleen Beaumard
Browse files

Add 95% confidence interval for scoring.py and scoring_cross_validation.py

parent 3096a152
......@@ -5,6 +5,7 @@ import re
import seaborn as sns
import pandas as pd
import numpy as np
from math import sqrt
import torchaudio
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
......@@ -54,11 +55,10 @@ ses_nb = args.session_test
labels = list(args.emotions.split(" "))
nb_batch = str(args.batchs)
model_type = args.model
if "(" in model_type:
model_type, model_2nd = model_type.split("(")[0], "-" + model_type.split("(")[1]
if "-" in model_type:
model_type, model_2nd = model_type.split("-")[0], "-" + model_type.split("-")[1]
else:
model_2nd = ""
lr = str(float(args.lr))
cates = str(args.categories)
if args.freeze is not None:
......@@ -71,6 +71,7 @@ else:
xtract, config = load_model("model_{}/best_{}_{}emo_{}batch_lr-{}_Test-IEMOCAP{}.pt"
.format(model_type, model_type, cates, nb_batch, lr, ses_nb), "cuda")
model_2nd += "_{}".format(config["model_archi"]["loss"]["type"])
predictions = []
gold_anno = []
path = "data/IEMOCAP/Session{}/sentences/wav".format(ses_nb)
......@@ -122,8 +123,7 @@ assert len(predictions) == len(gold_anno)
# We start to compare the predictions and gold_anno lists
UAR = metrics.recall_score(gold_anno, predictions, average="macro")
UARPercent = round(UAR * 100, 2)
print("\nUAR:", UARPercent, "%\n")
p = round(UAR, 2) # For the confidence interval
confMatrix = metrics.confusion_matrix(gold_anno, predictions)
print(confMatrix, "\n")
......@@ -133,7 +133,14 @@ gold_dic = []
for i in range(len(confMatrix)):
gold_dic.append(sum(confMatrix[i]))
n = sum(gold_dic) # For the confidence interval
conf_inter = round((1.96*(sqrt((p*(1-p))/n)), 2) # 95% confidence interval
UARPercent = round(UAR * 100, 2)
print("\nUAR:", UARPercent, "% ±", conf_inter, "\n")
[print("Total", labels[i], ":", gold_dic[i]) for i in range(len(labels))]
print("Total:", n)
annot = []
......@@ -159,7 +166,8 @@ fil.close()
# Search for "Loss:", "Validation Loss", "Epoch" and "reducing" in all lines
valid_loss = [line for line in file if "Validation Loss" in line]
lr_scheduler = [float(line.rsplit(":",1)[1].replace("\n","")) for line in file if "Scheduer" in line]
lr_scheduler = [float(line.rsplit(":",1)[1].replace("\n","")) for line in file if "Scheduler" in line]
# "Loss:" and "Epoch" in same line
if model_type == "custom":
loss_epoch = [line for line in file if "Epoch" in line]
......@@ -256,7 +264,7 @@ ax.set_xlabel("Epochs")
ax.set_ylabel("CrossEntropyLoss()")
twin.set_ylabel("Validation accuracy (%)")
twin.set_ylim(0, 100)
ax.set_ylim(0,9)
ax.set_ylim(0,max(vloss)+1)
ax.set_xlim(0, len(aepoch))
ax.legend(handles=handles)
......
......@@ -2,6 +2,7 @@ import torch
from tqdm import tqdm
import os
import seaborn as sns
from math import sqrt
import pandas as pd
import numpy as np
import torchaudio
......@@ -137,8 +138,8 @@ assert len(predictions) == len(gold_anno)
# We start to compare the predictions and gold_anno lists
UAR = metrics.recall_score(gold_anno, predictions, average="macro")
p = round(UAR, 2) # For confidence interval
UARPercent = round(UAR * 100, 2)
print("UAR:", UARPercent, "%\n")
confMatrix = metrics.confusion_matrix(gold_anno, predictions)
print(confMatrix)
......@@ -147,8 +148,13 @@ gold_dic = []
for i in range(len(confMatrix)):
gold_dic.append(sum(confMatrix[i]))
n = sum(gold_dic) # For confidence interval
conf_inter = round((1.96*sqrt((p*(1-p))/n)), 2) # 95% confidence interval
print("\nUAR:", UARPercent, "% ±", conf_inter, "\n")
[print("Total", labels[i], ":", gold_dic[i]) for i in range(len(labels))]
print("Total", n)
annot = []
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment