import os
import pickle as cP
import joblib as jl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
import pickle
from IPython.display import Image
from IPython.display import YouTubeVideo

def pickleload(fp):
    with open(fp, 'rb') as f:
        return pickle.load(f)
    
files_dir = '/home/shreyan/PROJECTS/midlevel/verena/midlevel/interpretations/files_song_versions/'
emo_names = ['valence', 'energy', 'tension', 'anger', 'fear', 'happy', 'sad', 'tender']

The Adam Feely CNN¶

Image('files_song_versions/feely.png')

Yes, I like Adam Neely, and I trained my network to Feel using the Adam optimizer.¶

(worst pun i made all year)¶

Just listening to music on YT on a regular day¶

Image('files_song_versions/hurt_yt_2.png')

The top comment is touching ...¶

(and 6000 people agree with it)¶

Image('files_song_versions/hurt_yt_comment.png')

The other version is the version by Johnny Cash¶

YouTubeVideo('4ahHWROn8M0', height=100)

Nine Inch Nails version¶

YouTubeVideo('kPz21cDK7dg', height=100)

Will a mere CNN feel the same way as these 6001 humans? Let's see...¶

Train CNNs to predict emotions and mid-level features...¶

Image('files_song_versions/net.png')

...using the Soundtracks dataset and Mid-level dataset...¶

Mid-level: 'melody', 'articulation', 'rhythm_complexity', 'rhythm_stability', 'dissonance', 'tonal_stability', 'minorness'

Soundtracks: 'valence', 'energy', 'tension', 'anger', 'fear', 'happy', 'sad', 'tender'

...and predict the emotion ratings for the two versions¶

Image('files_song_versions/e_hurt.png')

Very nice. The CNN has feelings. But can it introspect?¶

Thanks to an entire life of reflecting on its emotions, it can.¶

Image('files_song_versions/ml_hurt.png')

ML2Eweights = pickleload(os.path.join(files_dir, '1295_ml2e_weights_19'))

# ml_versions = pickleload(os.path.join(files_dir, 'ml_hallelujah'))

emotion_annotations = pickleload(os.path.join(files_dir, '1295_st_all_emo_anns'))
emotion_predictions = pickleload(os.path.join(files_dir, '1295_st_all_emo_preds'))
midlevel_annotations = pickleload(os.path.join(files_dir, '1295_st_all_ml_anns'))
midlevel_predictions = pickleload(os.path.join(files_dir, '1295_st_all_ml_preds'))

What musical characteristics make the Nine Inch Nails version sound "angrier"?¶

Tonal and rhythmic (in)stability, dissonance, and (un)melodiousness¶

ml_versions = pickleload(os.path.join(files_dir, 'ml_hurt'))
import matplotlib.patches as mpatches
blue_patch = mpatches.Patch(color='blue', label=ml_versions.index[0])
orange_patch = mpatches.Patch(color='orange', label=ml_versions.index[1])
fig, ax = plt.subplots(4,2,sharey=True,figsize=(25,30))
emotion_num = 0
vert_spacing=3.6
ml_names_plot = ['melody', 'artic.', 'rh.complx', 'rh.stblty', 'diss.', 'tonal', 'minor']
font = {'size'   : 15}
matplotlib.rc('font', **font)
for i in range(ax.shape[0]):
    for j in range(ax.shape[1]):
        # Calculate effect of midlevel features for current emotion across all songs
        effect = np.multiply(midlevel_predictions, ML2Eweights.transpose()[:,emotion_num])
        
        # Get midlevel predictions for song1 and song2 of current emotion
        song1_ml = ml_versions.iloc[0]
        song2_ml = ml_versions.iloc[1]
        # Calculate effect of these midlevel predictions
        song1_ml_effect = np.multiply(song1_ml, ML2Eweights.transpose()[:,emotion_num])
        song2_ml_effect = np.multiply(song2_ml, ML2Eweights.transpose()[:,emotion_num])
        # Plot all effects in boxplot
        ax[i][j].boxplot(effect.transpose(), vert=False, positions=np.linspace(1,vert_spacing,7), showfliers=False)
        # Plot effects of midlevel for max emotion prediction
        scat1 = ax[i][j].scatter(song1_ml_effect,np.linspace(1,vert_spacing,7),color='b', s=95, alpha=0.9)
        # Plot effects of midlevel for min emotion prediction
        scat2 = ax[i][j].scatter(song2_ml_effect,np.linspace(1,vert_spacing,7),color='orange', s=95, alpha=1)
        ax[i][j].set_yticklabels(ml_names_plot);
        ax[i][j].tick_params(axis='y', direction='in');
#         ax[i][j].set_title(emo_names[emotion_num])
        ax[i][j].text(.9,.93,emo_names[emotion_num],horizontalalignment='center', transform=ax[i][j].transAxes)
        ax[i][j].axvline(0, alpha=0.5, linestyle='--')
        ax[i][j].yaxis.grid(True)
        emotion_num += 1
plt.legend(handles=[blue_patch, orange_patch], loc='upper center', bbox_to_anchor=(0.5, -0.1))
fig.subplots_adjust(wspace=0)
# plt.savefig('effects.pdf', dpi=1200, bbox_inches="tight", pad_inches=0)

Other songs¶

1. Hallelujah¶

YouTubeVideo('ttEMYvpoR-k', height=100)

YouTubeVideo('y8AWFf7EAc4', height=100)

Image('files_song_versions/e_hallelujah.png')

Image('files_song_versions/ml_hallelujah.png')

ml_versions = pickleload(os.path.join(files_dir, 'ml_hallelujah'))
import matplotlib.patches as mpatches
blue_patch = mpatches.Patch(color='blue', label=ml_versions.index[0])
orange_patch = mpatches.Patch(color='orange', label=ml_versions.index[1])
fig, ax = plt.subplots(4,2,sharey=True,figsize=(25,30))
emotion_num = 0
vert_spacing=3.6
ml_names_plot = ['melody', 'artic.', 'rh.complx', 'rh.stblty', 'diss.', 'tonal', 'minor']
font = {'size'   : 15}
matplotlib.rc('font', **font)
for i in range(ax.shape[0]):
    for j in range(ax.shape[1]):
        # Calculate effect of midlevel features for current emotion across all songs
        effect = np.multiply(midlevel_predictions, ML2Eweights.transpose()[:,emotion_num])
        
        # Get midlevel predictions for song1 and song2 of current emotion
        song1_ml = ml_versions.iloc[0]
        song2_ml = ml_versions.iloc[1]
        # Calculate effect of these midlevel predictions
        song1_ml_effect = np.multiply(song1_ml, ML2Eweights.transpose()[:,emotion_num])
        song2_ml_effect = np.multiply(song2_ml, ML2Eweights.transpose()[:,emotion_num])
        # Plot all effects in boxplot
        ax[i][j].boxplot(effect.transpose(), vert=False, positions=np.linspace(1,vert_spacing,7), showfliers=False)
        # Plot effects of midlevel for max emotion prediction
        scat1 = ax[i][j].scatter(song1_ml_effect,np.linspace(1,vert_spacing,7),color='b', s=95, alpha=0.9)
        # Plot effects of midlevel for min emotion prediction
        scat2 = ax[i][j].scatter(song2_ml_effect,np.linspace(1,vert_spacing,7),color='orange', s=95, alpha=1)
        ax[i][j].set_yticklabels(ml_names_plot);
        ax[i][j].tick_params(axis='y', direction='in');
#         ax[i][j].set_title(emo_names[emotion_num])
        ax[i][j].text(.9,.93,emo_names[emotion_num],horizontalalignment='center', transform=ax[i][j].transAxes)
        ax[i][j].axvline(0, alpha=0.5, linestyle='--')
        ax[i][j].yaxis.grid(True)
        emotion_num += 1
plt.legend(handles=[blue_patch, orange_patch], loc='upper center', bbox_to_anchor=(0.5, -0.1))
fig.subplots_adjust(wspace=0)

2. The Girl from Ipanema¶

The model is not yet sensitive to subtle differences between song versions (or maybe that's a good thing?)¶

YouTubeVideo('vIGHaRE4dhk', height=100)

YouTubeVideo('j8VPmtyLqSY', height=100)

YouTubeVideo('aFlEOn20UiA', height=100)

Image('files_song_versions/e_ipanema.png')

Image('files_song_versions/ml_ipanema.png')

ml_versions = pickleload(os.path.join(files_dir, 'ml_ipanema'))
import matplotlib.patches as mpatches
red_patch = mpatches.Patch(color='red', label=ml_versions.index[0])
blue_patch = mpatches.Patch(color='blue', label=ml_versions.index[1])
green_patch = mpatches.Patch(color='green', label=ml_versions.index[2])
fig, ax = plt.subplots(4,2,sharey=True,figsize=(25,30))
emotion_num = 0
vert_spacing=3.6
ml_names_plot = ['melody', 'artic.', 'rh.complx', 'rh.stblty', 'diss.', 'tonal', 'minor']
font = {'size'   : 15}
matplotlib.rc('font', **font)
for i in range(ax.shape[0]):
    for j in range(ax.shape[1]):
        # Calculate effect of midlevel features for current emotion across all songs
        effect = np.multiply(midlevel_predictions, ML2Eweights.transpose()[:,emotion_num])
        
        # Get midlevel predictions for song1 and song2 of current emotion
        song1_ml = ml_versions.iloc[0]
        song2_ml = ml_versions.iloc[1]
        song3_ml = ml_versions.iloc[2]
        # Calculate effect of these midlevel predictions
        song1_ml_effect = np.multiply(song1_ml, ML2Eweights.transpose()[:,emotion_num])
        song2_ml_effect = np.multiply(song2_ml, ML2Eweights.transpose()[:,emotion_num])
        song3_ml_effect = np.multiply(song3_ml, ML2Eweights.transpose()[:,emotion_num])
        # Plot all effects in boxplot
        ax[i][j].boxplot(effect.transpose(), vert=False, positions=np.linspace(1,vert_spacing,7), showfliers=False)
        # Plot effects of midlevel for max emotion prediction
        scat1 = ax[i][j].scatter(song1_ml_effect,np.linspace(1,vert_spacing,7),color='r', s=95, alpha=0.7)
        # Plot effects of midlevel for min emotion prediction
        scat2 = ax[i][j].scatter(song2_ml_effect,np.linspace(1,vert_spacing,7),color='b', s=95, alpha=1)
        scat3 = ax[i][j].scatter(song3_ml_effect,np.linspace(1,vert_spacing,7),color='g', s=95, alpha=0.7)
        ax[i][j].set_yticklabels(ml_names_plot);
        ax[i][j].tick_params(axis='y', direction='in');
#         ax[i][j].set_title(emo_names[emotion_num])
        ax[i][j].text(.9,.93,emo_names[emotion_num],horizontalalignment='center', transform=ax[i][j].transAxes)
        ax[i][j].axvline(0, alpha=0.5, linestyle='--')
        ax[i][j].yaxis.grid(True)
        emotion_num += 1
plt.legend(handles=[red_patch, blue_patch, green_patch], loc='upper center', bbox_to_anchor=(0.5, -0.1))
fig.subplots_adjust(wspace=0)