from utils import *
path = '/home/shreyan/PROJECTS/midlevel/Soundtracks/set1/set1/mp3/'
mp3_path = 'Soundtrack360_mp3/'
stft_path = 'stft/'
songname = '001.mp3'
song_audio_path = f'{path}{mp3_path}{songname}'
song_spec_path = f'{path}{stft_path}{songname}.spec'
song_stft = pickleload(f'{path}{stft_path}{songname}.spec.stft')
song_filterbank = pickleload(f'{path}{stft_path}{songname}.spec.filterbank')
display(Audio(song_audio_path, rate=22050))
prediction_fn_audio = compile_prediction_function_audio(modelfile)
spectrum, _, _, _ = prepare_audio(song_spec_path)
ml_preds, emo_preds = prediction_fn_audio(np.array([spectrum]))
emo_preds = pd.DataFrame(emo_preds)
emo_preds.columns = emo_names
print(f"Emotion predictions for song {songname}")
print(emo_preds.T)
ml_preds = pd.DataFrame(ml_preds)
ml_preds.columns = ml_names
print(f"Mid-level predictions for song {songname}")
print(ml_preds.T)
fig, ax = plt.subplots(2,4,sharey=True,figsize=(25,8))
emotion_num = 0
for i in range(ax.shape[0]):
for j in range(ax.shape[1]):
song_ml = ml_preds
song_ml_effect = np.multiply(song_ml, ML2Eweights.transpose()[:,emotion_num])
plt1 = ax[i][j].barh(np.arange(7),song_ml_effect.values[0],color='g', alpha=0.6)
ax[i][j].set_yticks(np.arange(7))
ax[i][j].set_yticklabels(ml_names)
ax[i][j].tick_params(axis='y', direction='in');
ax[i][j].text(.9,.93,emo_names[emotion_num],horizontalalignment='center', transform=ax[i][j].transAxes)
ax[i][j].axvline(0, alpha=0.5, linestyle='--')
ax[i][j].yaxis.grid(True)
ax[i][j].set_xlim(left=-0.5, right=0.5)
emotion_num += 1
fig.subplots_adjust(wspace=0)
from skimage.segmentation import felzenszwalb, mark_boundaries
spectrum, spec_orig, start_stop_times, start_stop_frames = prepare_audio(song_spec_path)
start_frame = start_stop_frames[0]
stop_frame = start_stop_frames[1]
song_stft_sliced = song_stft[start_frame:stop_frame,:]
segments = felzenszwalb(spectrum / np.max(np.abs(spectrum)), scale=25, min_size=40)
plt.imshow(np.rot90(spec_orig))
plt.xticks(np.linspace(0,spec_orig.shape[0], 5).astype(int), np.linspace(start_stop_times[0], start_stop_times[1], 5).round(1))
plt.show()
ml_pred, start_stop = prediction_fn_audio(np.array([spectrum]))
ml_pred = pd.DataFrame(ml_pred)
ml_pred.columns = ml_names
print(ml_pred.T)
list_exp = []
# spectrum = spectrum / np.max(np.abs(spectrum))
print("\n------LIME based analysis-----")
explainer = lime_image.LimeImageExplainer(verbose=True)
explanation, seg = explainer.explain_instance(image=spectrum,
classifier_fn=prediction_fn_audio,
hide_color=0, num_targets=7,
num_samples=50000, for_emotion=False,
segmentation_fn=felzenszwalb,
scale=25, min_size=40
)
from sklearn.metrics import r2_score
print("R2-score for the linear surrogate function", r2_score(explainer.base.right, [i[0] for i in explainer.base.predictions]))
aud_orig = recon_audio(song_stft_sliced, song_filterbank, spec_orig)
analyse_midlevel_i(explanation, 1, spec_orig, song_stft_sliced, song_filterbank, ml_dict, ml_names, prediction_fn_audio)