Commit 13e5878a authored by Valentin Pelloin's avatar Valentin Pelloin
Browse files

confidence interval for similarity evaluation

parent 66ada874
......@@ -627,8 +627,8 @@ class svd2vec:
msim = self.similarity(w1, w2)
x.append(hsim)
y.append(msim)
pearson = pearsonr(np.array(x), np.array(y))
return pearson
pearson, p_value, low, high = Utils.confidence_pearson(np.array(x), np.array(y))
return pearson, p_value, (low, high)
def evaluate_word_analogies(self, analogies, section_separator=":"):
......
......@@ -2,6 +2,7 @@
import sys
import random
import numpy as np
from scipy import stats
import cProfile
import pstats
......@@ -85,3 +86,13 @@ class Utils:
return result
return profiled_func
def confidence_pearson(x, y, alpha=0.05):
# thanks to https://zhiyzuo.github.io/Pearson-Correlation-CI-in-Python/
r, p = stats.pearsonr(x, y)
r_z = np.arctanh(r)
se = 1 / np.sqrt(x.size - 3)
z = stats.norm.ppf(1 - alpha / 2)
lo_z, hi_z = r_z - z * se, r_z + z * se
lo, hi = np.tanh((lo_z, hi_z))
return lo, hi
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment