Commit 1b97680b authored by Marie Tahon's avatar Marie Tahon
Browse files

modifications on TF_differential.py and addition of delta chrom visualization...

modifications on TF_differential.py and addition of delta chrom visualization in spectral_clustering_audio.py
parent 06c3b71b
Pipeline #522 canceled with stages
......@@ -14,51 +14,161 @@ import sys, os
import librosa
import librosa.display
import argparse
import params
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
BINS_PER_OCTAVE = 12*3
N_OCTAVES = 7
nfft = int(1024*2)
step = int(nfft /2)
if len(sys.argv) < 2:
sys.exit('no audio file')
if len(sys.argv) < 3:
start = 0.0
else:
start = sys.argv[2]
if len(sys.argv) < 4:
duration = None
else:
duration = sys.argv[3]
y, sr = librosa.load(sys.argv[1], offset=start, duration = duration)
## Calculation of differential Fourrier Transform
D_delta = librosa.feature.delta(D**2)
D_time = librosa.istft(np.sqrt(np.abs(D_delta)) * np.exp(np.angle(D)), hop_length = step)
t = np.linspace(0, y.shape[0]*sr, D_time.shape[0])
ty = np.linspace(0, y.shape[0]*sr, y.shape[0])
fig_s = plt.figure(figsize=(12,4))
ax_s0 = fig_s.add_subplot(2,1,1)
librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max), y_axis='log', sr=sr, hop_length = step, x_axis='time')
ax_s0.set_title('original CQT spectrogram')
ax_s1 = fig_s.add_subplot(2,1,2)
librosa.display.specshow(librosa.amplitude_to_db(D_delta, ref=np.max), y_axis='log', sr=sr, hop_length = step, x_axis='time')
ax_s1.set_title('Differential CQT spectrogram')
plt.tight_layout()
plt.figure()
plt.plot(t, D_time, 'r')
plt.plot(ty, y, alpha=0.5)
plt.xlabel('time (sec.)')
plt.ylabel('amplitude (UA)')
plt.legend(('differential TF', 'original'), loc='upper right')
plt.title('Differential TF')
plt.show()
BINS_PER_OCTAVE = params.BINS_PER_OCTAVE
N_OCTAVES = params.N_OCTAVES
NFFT = int(params.NFFT)
STEP = int(params.STEP)
#######################################
def detect_onsets(y, sr, M):
#detect onsets
oenv = librosa.onset.onset_strength(S=M, sr=sr)
# Detect events without backtracking
onset_raw = librosa.onset.onset_detect(onset_envelope=oenv, backtrack=False)
## Backtrack the events using the onset envelope
onset_bt = librosa.onset.onset_backtrack(onset_raw, oenv)
# we fix_frames to include non-beat frames 0 and C.shape[1] (final frame)
onset_frames = librosa.util.fix_frames(onset_raw, x_min=0, x_max=M.shape[1]-1)
onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length = STEP)
# To reduce dimensionality, we'll beat-synchronous the CQT
Msync = librosa.util.sync(M, onset_raw, aggregate=np.median)
return onset_raw, onset_times, Msync
##############################################
def detect_beats(y, sr, M):
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length = STEP, trim=False)
print('Detected tempo: {0:.2f} bpm'.format(tempo))
beat_period = np.diff(librosa.frames_to_time(beats, sr=sr, hop_length= STEP))
print('mean beat period: {0:.2f} ; std beat period: {1:.2f}'.format(60/np.mean(beat_period), np.std(beat_period)))
beats_frames = librosa.util.fix_frames(beats, x_min=0, x_max=M.shape[1]-1)
beat_times = librosa.frames_to_time(beats_frames, sr=sr, hop_length = STEP)
Msync = librosa.util.sync(M, beats_frames, aggregate=np.median)
return beats_frames, beat_times, Msync
##############################################
def no_onsets(sr, M):
onsets = np.arange(0, M.shape[1])
onset_times = librosa.samples_to_time(onsets, sr=sr/STEP)
return onsets, onset_times, M
def get_manual_beats(sr, M, filename):
with open(filename, 'r') as f:
data = f.readlines()
times = np.array([float(x.strip()) for x in data[1:]])
frames = np.array([int(x * sr / STEP) for x in times])
onsets = librosa.util.fix_frames(frames, x_min=0, x_max=M.shape[1]-1)
onset_times = librosa.frames_to_time(onsets, sr=sr, hop_length = STEP)
Msync = librosa.util.sync(M, onsets, aggregate=np.median)
return onsets, onset_times, Msync
def extract_onsets(y, sr, C, manual_opt):
method = params.onset
#compute the CQT transform C: np.array((252, Tmax*sr/STEP))
#C = librosa.amplitude_to_db(librosa.core.magphase(librosa.cqt(y=y, sr=sr, bins_per_octave=BINS_PER_OCTAVE, n_bins=N_OCTAVES * BINS_PER_OCTAVE, hop_length = STEP))[0], ref=np.max)
#to reduce dimensionality, we'll onset-synchronous the CQT
#onset is a vector of onset indexes np.array((N+1,)) including 0
#onset_times is a vector of onset times np.array((N+1,)) including 0
#Csync is the CQT transform synchronized on onsets np.array((252, N))
if method == 'no':
onset, onset_times, Csync = no_onsets(sr, C)
elif method == 'onset':
onset, onset_times, Csync = detect_onsets(y, sr, C)
elif method == 'beat':
onset, onset_times, Csync = detect_beats(y, sr, C)
elif method == 'manual':
onset, onset_times, Csync = get_manual_beats(sr, C, manual_opt)
else:
print('onset parameter is not well-defined')
sys.exit()
return onset, onset_times, Csync
def plot_spectrograms(D, Dd, sr):
fig_s = plt.figure(figsize=(12,4))
ax_s0 = fig_s.add_subplot(2,1,1)
librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=np.max), y_axis='log', sr=sr, hop_length = STEP, x_axis='time')
ax_s0.set_title('original CQT spectrogram')
ax_s1 = fig_s.add_subplot(2,1,2)
librosa.display.specshow(librosa.amplitude_to_db(Dd, ref=np.max), y_axis='log', sr=sr, hop_length = STEP, x_axis='time')
ax_s1.set_title('Differential CQT spectrogram')
plt.tight_layout()
def plot_wavforms(t, y, td, yd):
plt.figure()
plt.plot(td, yd, 'r')
plt.plot(t, y, alpha=0.5)
plt.xlabel('time (sec.)')
plt.ylabel('amplitude (UA)')
plt.legend(('differential TF', 'original'), loc='upper right')
plt.title('Differential TF')
plt.tight_layout()
def main():
parser = argparse.ArgumentParser(description='Computation and visualisation of differential transform based on synchronous beats.')
parser.add_argument('filename', type=str, help='name of audio file')
parser.add_argument('manual_onset', nargs='?', type=str, help='name of the file containing manual annotations for onset timestamps (with method=manual)')
args = parser.parse_args()
y, sr = librosa.load(args.filename, offset=params.begin, duration = params.duration)
## Calculation of Fourrier Transform
D_mag, D_phase = librosa.core.magphase(librosa.stft(y, n_fft = NFFT, hop_length = STEP))
##synchronisation on onsets
onset_ech, onset_times, Dsync = extract_onsets(y, sr, D_mag, args.manual_onset)
## Calculation of differential Fourrier Transform
Dsync_delta = librosa.feature.delta(Dsync)
## back synchronisation on samples.
D_delta = np.zeros((D_mag.shape[0], D_mag.shape[1]))
it = 0
for n in range(D_mag.shape[1]):
if n in onset_ech:
D_delta[:,n] = Dsync_delta[:,it]
it = it + 1
plot_spectrograms(D_mag, D_delta, sr)
D_time = librosa.istft( (D_delta * D_phase), hop_length = STEP)
print(D_mag.shape, D_delta.shape, Dsync.shape, Dsync_delta.shape, D_time.shape, D_phase.shape)
t = np.linspace(0, y.shape[0]*sr, y.shape[0])
t_delta = np.linspace(0, y.shape[0]*sr, D_time.shape[0])
plot_wavforms(t, y, t_delta, D_time)
plt.show()
if __name__ == '__main__':
main()
......@@ -7,12 +7,12 @@ NFFT = 2 ** 11 #(> 2**10) duration of analysis window in samples for feature ext
STEP = NFFT / 2 #(>2**6) et (STEP < NFFT) 50% overlap between time windows / also sub-frequency after analyzing spectral structure.
#dimensionality reduction synchronisation on downsampled time (mean aggregate function)
onset = 'beat' #onsets that are extracted: no (regular onset extraction), 'onset' (onset extraction) 'beat' (beat extraction)
onset = 'beat' #onsets that are extracted: no (regular onset extraction), 'onset' (onset extraction) 'beat' (beat extraction), 'manual' (manual annotations of onsets)
onset_percu = False #extract onsets on percussive part of the signal. Also create percu and harmonic wav files.
onset_plot = True
#features
feat = ['spectral'] #feat = ['spectral', 'chroma', 'cepstral']
feat = ['chroma'] #feat = ['spectral', 'chroma', 'cepstral']
#compute spectral features: centroid, flatness, rolloff (5, 25, 50, 75, 95), contrast
#compute chroma featues: N_OCTAVES chroma + N_OCTAVES Dchroma
#compute cepstral featues: 20MFCC + 20DMFCC
......@@ -26,7 +26,8 @@ cluster_dist = True # add cosine distance between clusters on final plot
cluster_nb_max = 5 #maximum nb of clusters in 1 sec.
#plots
plot_simi = True
plot_struct = True
plot_dist = True
plot_simi = False
plot_struct = False
plot_dist = False
plot_features = True
timestamps = True
......@@ -74,7 +74,7 @@ STEP = int(params.STEP)
#######################################
def detect_onsets(y, sr, M, plot_opt):
def detect_onsets(y, sr, M):
#detect onsets
oenv = librosa.onset.onset_strength(S=M, sr=sr)
# Detect events without backtracking
......@@ -87,7 +87,7 @@ def detect_onsets(y, sr, M, plot_opt):
# To reduce dimensionality, we'll beat-synchronous the CQT
Msync = librosa.util.sync(M, onset_raw, aggregate=np.median)
if plot_opt:
if params.onset_plot:
plt.figure(figsize=(12, 4))
plt.plot(oenv, label='Onset strength')
plt.vlines(onset_raw, 0, oenv.max(), label='Raw onsets')
......@@ -111,7 +111,7 @@ def detect_onsets(y, sr, M, plot_opt):
##############################################
def detect_beats(y, sr, M, plot_opt):
def detect_beats(y, sr, M):
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length = STEP, trim=False)
print('Detected tempo: {0:.2f} bpm'.format(tempo))
beat_period = np.diff(librosa.frames_to_time(beats, sr=sr, hop_length= STEP))
......@@ -121,7 +121,7 @@ def detect_beats(y, sr, M, plot_opt):
beat_times = librosa.frames_to_time(beats_frames, sr=sr, hop_length = STEP)
Msync = librosa.util.sync(M, beats_frames, aggregate=np.median)
if plot_opt:
if params.onset_plot:
plt.figure(figsize=(12, 4))
plt.subplot(2,1,1)
plt.title('CQT spectrogram')
......@@ -137,12 +137,12 @@ def detect_beats(y, sr, M, plot_opt):
return beats_frames, beat_times, Msync
##############################################
def no_onsets(y, sr, M, plot_opt):
def no_onsets(sr, M):
onsets = np.arange(0, M.shape[1])
onset_times = librosa.samples_to_time(onsets, sr=sr/STEP)
if plot_opt:
if params.onset_plot:
plt.figure(figsize=(12, 4))
plt.title('CQT spectrogram')
librosa.display.specshow(M, y_axis='cqt_hz', sr=sr, bins_per_octave=BINS_PER_OCTAVE, x_axis='time', x_coords=onset_times)
......@@ -151,8 +151,33 @@ def no_onsets(y, sr, M, plot_opt):
return onsets, onset_times, M
def extract_onsets(method, y, sr, plot_opt):
def get_manual_beats(sr, M, filename):
with open(filename, 'r') as f:
data = f.readlines()
times = np.array([float(x.strip()) for x in data[1:]])
frames = np.array([int(x * sr / STEP) for x in times])
onsets = librosa.util.fix_frames(frames, x_min=0, x_max=M.shape[1]-1)
onset_times = librosa.frames_to_time(onsets, sr=sr, hop_length = STEP)
Msync = librosa.util.sync(M, onsets, aggregate=np.median)
if params.onset_plot:
plt.figure(figsize=(12, 4))
plt.subplot(2,1,1)
plt.title('CQT spectrogram')
librosa.display.specshow(M, y_axis='cqt_hz', sr=sr, hop_length=STEP, bins_per_octave=BINS_PER_OCTAVE, x_axis='time')
plt.tight_layout()
plt.subplot(2,1,2)
plt.title('CQT spectrogram synchronized on beats')
librosa.display.specshow(Msync, bins_per_octave=BINS_PER_OCTAVE, y_axis='cqt_hz', x_axis='time', x_coords=onset_times)
plt.tight_layout()
return onsets, onset_times, Msync
def extract_onsets(y, sr, manual_opt):
method = params.onset
#compute the CQT transform C: np.array((252, Tmax*sr/STEP))
C = librosa.amplitude_to_db(librosa.core.magphase(librosa.cqt(y=y, sr=sr, bins_per_octave=BINS_PER_OCTAVE, n_bins=N_OCTAVES * BINS_PER_OCTAVE, hop_length = STEP))[0], ref=np.max)
#to reduce dimensionality, we'll onset-synchronous the CQT
......@@ -160,11 +185,13 @@ def extract_onsets(method, y, sr, plot_opt):
#onset_times is a vector of onset times np.array((N+1,)) including 0
#Csync is the CQT transform synchronized on onsets np.array((252, N))
if method == 'no':
onset, onset_times, Csync = no_onsets(y, sr, C, plot_opt)
onset, onset_times, Csync = no_onsets(sr, C)
elif method == 'onset':
onset, onset_times, Csync = detect_onsets(y, sr, C, plot_opt)
onset, onset_times, Csync = detect_onsets(y, sr, C)
elif method == 'beat':
onset, onset_times, Csync = detect_beats(y, sr, C, plot_opt)
onset, onset_times, Csync = detect_beats(y, sr, C)
elif method == 'manual':
onset, onset_times, Csync = get_manual_beats(sr, C, manual_opt)
else:
print('onset parameter is not well-defined')
sys.exit()
......@@ -375,7 +402,41 @@ def plot_structure(Rf, X, seg_ids, k, onset_times):
def plot_features(X, onsets, onset_times):
Xsync = librosa.util.sync(X, onsets, aggregate=np.median)
print(X.shape, Xsync.shape)
if params.feat[0] == 'chroma':
plt.figure(figsize=(12, 4))
plt.subplot(2,1,1)
plt.title('onset-synchronous chroma (12)')
librosa.display.specshow(Xsync[:13,:], y_axis='chroma', x_axis='time', x_coords=onset_times)
plt.colorbar()
plt.tight_layout()
plt.subplot(2,1,2)
plt.title('onset-synchronous delta chroma (12)')
librosa.display.specshow(Xsync[12:,:], y_axis='chroma', x_axis='time', x_coords=onset_times)
plt.colorbar()
plt.tight_layout()
elif params.feat[0] == 'cepstral':
plt.figure(figsize=(12, 4))
plt.subplot(2,1,1)
plt.title('onset-synchronous MFCC (20)')
librosa.display.specshow(Xsync[:21,:], x_axis='time', x_coords=onset_times)
plt.colorbar()
plt.tight_layout()
plt.subplot(2,1,2)
plt.title('onset-synchronous delta MFCC (20)')
librosa.display.specshow(Xsync[20:,:], x_axis='time', x_coords=onset_times)
plt.colorbar()
plt.tight_layout()
else:
print('these parameters can not be plot')
......@@ -416,7 +477,7 @@ def feature_extraction(y, sr, opt_tuning):
full.append(fcep)
if 'chroma' in params.feat:
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, n_fft = NFFT, hop_length = STEP, norm = None, tunning= A440)
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma = 12, n_octaves = N_OCTAVES, hop_length = STEP, norm = None, tuning= A440)
chroma_delta = librosa.feature.delta(chroma)
fchr = np.concatenate((chroma, chroma_delta), axis=0)
full.append(fchr)
......@@ -510,6 +571,7 @@ def main():
parser = argparse.ArgumentParser(description='Segmentation and clustering of musical sections with spectral clustering (Laplacian matrix and eigen values)')
parser.add_argument('filename', type=str, help='name of audio file')
parser.add_argument('manual_onset', nargs='?', type=str, help='name of the file containing manual annotations for onset timestamps (with method=manual)')
args = parser.parse_args()
......@@ -523,9 +585,13 @@ def main():
#extract acoustic feature from audio signal feat is a matrix np.array((nb features, Tmax*sr/STEP))
feat = feature_extraction(y, sr, params.opt_tuning)
#extract onset indexes and times + onset-synchronous CQT transform on onsets.
onsets, onset_times, Csync = extract_onsets(params.onset, y, sr, params.onset_plot)
onsets, onset_times, Csync = extract_onsets(y, sr, args.manual_onset)
if params.plot_features: plot_features(feat, onsets, onset_times)
#================
# Affinity matrix
#================
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment