Added project files

d645b63d · Nilesh Jagdish · fcfae6f0 · d645b63d · d645b63d · d645b63d
Commit d645b63d authored Aug 18, 2022 by Nilesh Jagdish
5 changed files
--- a/CS626 FinalProjectPresentation.pdf
+++ b/CS626 FinalProjectPresentation.pdf
--- a/combined_classification.py
+++ b/combined_classification.py
+# -*- coding: utf-8 -*-
+"""Combined_classification.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/18oDzOkExok8oXNpf4EuhMMqFtw4-uQdN
+"""
+import pandas as pd
+import numpy as np
+import pickle
+import itertools
+import xgboost as xgb
+from sklearn.svm import LinearSVC
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.neural_network import MLPClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.feature_selection import SelectFromModel
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer
+from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score
+import matplotlib
+from sklearn.manifold import TSNE
+from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
+from sklearn.utils.class_weight import compute_class_weight
+from IPython.display import display
+import matplotlib.pyplot as plt
+import seaborn as sns
+x_train_text = pd.read_csv('/content/drive/MyDrive/CS626/iemocap/text_train.csv')
+x_test_text = pd.read_csv('/content/drive/MyDrive/CS626/iemocap/text_test.csv')
+y_train_text = x_train_text['label']
+y_test_text = x_test_text['label']
+x_train_audio = pd.read_csv('/content/drive/MyDrive/CS626/iemocap/audio_train.csv')
+x_test_audio = pd.read_csv('/content/drive/MyDrive/CS626/iemocap/audio_test.csv')
+y_train_audio = x_train_audio['label']
+y_test_audio = x_test_audio['label']
+y_train = y_train_audio  # since y_train_audio == y_train_text
+y_test = y_test_audio  # since y_train_audio == y_train_text
+print(x_train_text.shape, y_train_text.shape, x_train_audio.shape, y_train_audio.shape)
+from google.colab import drive
+drive.mount('/content/drive')
+emotion_dict = {'ang': 0,
+                'hap': 1,
+                'sad': 2,
+                'fea': 3,
+                'sur': 4,
+                'neu': 5}
+emo_keys = list(['ang', 'hap', 'sad', 'fea', 'sur', 'neu'])
+id_to_emotion = {0: 'ang', 1: 'hap', 2: 'sad', 3: 'fea', 4: 'sur', 5: 'neu'}
+def plot_confusion_matrix(cm, classes,
+                          normalize=False,
+                          title='Confusion matrix',
+                          cmap=plt.cm.Blues):
+    """
+    This function prints and plots the confusion matrix.
+    Normalization can be applied by setting `normalize=True`.
+    """
+    # plt.figure(figsize=(8,8))
+    plt.imshow(cm, interpolation='nearest', cmap=cmap)
+    plt.title(title)
+    plt.colorbar()
+    tick_marks = np.arange(len(classes))
+    plt.xticks(tick_marks, classes, rotation=45)
+    plt.yticks(tick_marks, classes)
+    if normalize:
+        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+        print("Normalized confusion matrix")
+    else:
+        print('Confusion matrix, without normalization')
+    print(cm)
+    thresh = cm.max() / 2.
+    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
+        plt.text(j, i, cm[i, j],
+                 horizontalalignment="center",
+                 color="white" if cm[i, j] > thresh else "black")
+    plt.tight_layout()
+    plt.ylabel('True label')
+    plt.xlabel('Predicted label')
+def one_hot_encoder(true_labels, num_records, num_classes):
+    temp = np.array(true_labels[:num_records])
+    true_labels = np.zeros((num_records, num_classes))
+    true_labels[np.arange(num_records), temp] = 1
+    return true_labels
+def display_results(y_test, pred_probs, cm=True):
+    pred = np.argmax(pred_probs, axis=-1)
+    one_hot_true = one_hot_encoder(y_test, len(pred), len(emotion_dict))
+    print('Test Set Accuracy =  {0:.3f}'.format(accuracy_score(y_test, pred)))
+    print('Test Set F-score =  {0:.3f}'.format(f1_score(y_test, pred, average='macro')))
+    print('Test Set Precision =  {0:.3f}'.format(precision_score(y_test, pred, average='macro')))
+    print('Test Set Recall =  {0:.3f}'.format(recall_score(y_test, pred, average='macro')))
+    if cm:
+        plot_confusion_matrix(confusion_matrix(y_test, pred), classes=emo_keys)
+cl_weight = dict(pd.Series(x_train_audio['label']).value_counts(normalize=True))
+"""## Get Text Features"""
+tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')
+features_text = tfidf.fit_transform(x_train_text.append(x_test_text).transcription).toarray()
+x_train_text = features_text[:x_train_text.shape[0]]
+x_test_text = features_text[-x_test_text.shape[0]:]
+print(features_text.shape, x_train_text.shape, x_test_text.shape)
+!pip install sentence_transformers
+from sentence_transformers import SentenceTransformer
+bert = SentenceTransformer('bert-base-uncased')
+print(x_train_text)
+# features = x_train_text[:10].transcription.apply(bert.encode)
+features_text = x_train_text.append(x_test_text).transcription.apply(bert.encode)
+x_train_text = features_text[:x_train_text.shape[0]]
+x_test_text = features_text[-x_test_text.shape[0]:]
+print(features_text.shape, x_train_text.shape, x_test_text.shape)
+print(x_train_text)
+x_train_text = np.array(x_train_text.values.tolist())
+x_test_text = np.array(x_test_text.values.tolist())
+print(features_text.shape, x_train_text.shape, x_test_text.shape)
+"""## Combine Text + Audio Features"""
+combined_x_train = np.concatenate((np.array(x_train_audio[x_train_audio.columns[2:]]), x_train_text), axis=1)
+combined_x_test = np.concatenate((np.array(x_test_audio[x_test_audio.columns[2:]]), x_test_text), axis=1)
+print(combined_x_train.shape, combined_x_test.shape)
+combined_features_dict = {}
+combined_features_dict['x_train'] = combined_x_train
+combined_features_dict['x_test'] = combined_x_test
+combined_features_dict['y_train'] = np.array(y_train)
+combined_features_dict['y_test'] = np.array(y_test)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_features_bert.pkl', 'wb') as f:
+    pickle.dump(combined_features_dict, f)
+rf_classifier = RandomForestClassifier(n_estimators=600, min_samples_split=25)
+rf_classifier.fit(combined_x_train, y_train)
+# Predict
+pred_probs = rf_classifier.predict_proba(combined_x_test)
+# Results
+display_results(y_test, pred_probs)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_rf_classifier_bert.pkl', 'wb') as f:
+    pickle.dump(pred_probs, f)
+with open('/content/drive/MyDrive/CS626/iemocap/RF_bert.pkl', 'wb') as f:
+    pickle.dump(rf_classifier, f)
+xgb_classifier = xgb.XGBClassifier(max_depth=7, learning_rate=0.008, objective='multi:softprob', 
+                                   n_estimators=600, sub_sample=0.8, num_class=len(emotion_dict),
+                                   booster='gbtree', n_jobs=4)
+xgb_classifier.fit(combined_x_train, y_train)
+# Predict
+pred_probs = xgb_classifier.predict_proba(combined_x_test)
+# Results
+display_results(y_test, pred_probs)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_xgb_classifier_bert.pkl', 'wb') as f:
+    pickle.dump(pred_probs, f)
+with open('/content/drive/MyDrive/CS626/iemocap/XGB_bert.pkl', 'wb') as f:
+    pickle.dump(xgb_classifier, f)
+svc_classifier = LinearSVC()
+svc_classifier.fit(combined_x_train, y_train)
+# Predict
+pred = svc_classifier.predict(combined_x_test)
+# Results
+one_hot_true = one_hot_encoder(y_test, len(pred), len(emotion_dict))
+print('Test Set Accuracy =  {0:.3f}'.format(accuracy_score(y_test, pred)))
+print('Test Set F-score =  {0:.3f}'.format(f1_score(y_test, pred, average='macro')))
+print('Test Set Precision =  {0:.3f}'.format(precision_score(y_test, pred, average='macro')))
+print('Test Set Recall =  {0:.3f}'.format(recall_score(y_test, pred, average='macro')))
+plot_confusion_matrix(confusion_matrix(y_test, pred), classes=emo_keys)
+(y_test, pred_probs)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_svc_classifier_model_bert.pkl', 'wb') as f:
+    pickle.dump(pred, f)
+with open('/content/drive/MyDrive/CS626/iemocap/SVC_bert.pkl', 'wb') as f:
+    pickle.dump(svc_classifier, f)
+mnb_classifier = MultinomialNB()
+mnb_classifier.fit(combined_x_train, y_train)
+# Predict
+pred_probs = mnb_classifier.predict_proba(combined_x_test)
+# Results
+display_results(y_test, pred_probs)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_mnb_classifier_bert.pkl', 'wb') as f:
+    pickle.dump(pred_probs, f)
+with open('/content/drive/MyDrive/CS626/iemocap/MNB_bert.pkl', 'wb') as f:
+    pickle.dump(mnb_classifier, f)
+mlp_classifier = MLPClassifier(hidden_layer_sizes=(1000, ), activation='relu', solver='adam', alpha=0.0001,
+                               batch_size='auto', learning_rate='adaptive', learning_rate_init=0.01,
+                               power_t=0.5, max_iter=1000, shuffle=True, random_state=None, tol=0.0001,
+                               verbose=False, warm_start=True, momentum=0.8, nesterovs_momentum=True,
+                               early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
+                               epsilon=1e-08)
+mlp_classifier.fit(combined_x_train, y_train)
+# Predict
+pred_probs = mlp_classifier.predict_proba(combined_x_test)
+# Results
+display_results(y_test, pred_probs)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_mlp_classifier_bert.pkl', 'wb') as f:
+    pickle.dump(pred_probs, f)
+with open('/content/drive/MyDrive/CS626/iemocap/MLP_bert.pkl', 'wb') as f:
+    pickle.dump(mlp_classifier, f)
+lr_classifier = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000)
+lr_classifier.fit(combined_x_train, y_train)
+# Predict
+pred_probs = lr_classifier.predict_proba(combined_x_test)
+# Results
+display_results(y_test, pred_probs)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_lr_classifier_bert.pkl', 'wb') as f:
+    pickle.dump(pred_probs, f)
+with open('/content/drive/MyDrive/CS626/iemocap/LR_bert.pkl', 'wb') as f:
+    pickle.dump(lr_classifier, f)
+ax = xgb.plot_importance(xgb_classifier, max_num_features=10, height=0.5, show_values=False)
+fig = ax.figure
+fig.set_size_inches(8, 8)
+contribution_scores = xgb_classifier.feature_importances_
+print(contribution_scores)
+# Load predicted probabilities
+with open('/content/drive/MyDrive/CS626/iemocap/combined_rf_classifier_bert.pkl', 'rb') as f:
+    rf_pred_probs = pickle.load(f)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_xgb_classifier_bert.pkl', 'rb') as f:
+    xgb_pred_probs = pickle.load(f)
+# with open('/content/drive/MyDrive/CS626/iemocap/combined_svc_classifier_model_bert.pkl', 'rb') as f:
+#     svc_preds = pickle.load(f)
+#     svc_preds = svc_preds.reshape(-1, 6)
+# with open('/content/drive/MyDrive/CS626/iemocap/combined_mnb_classifier_bert.pkl', 'rb') as f:
+#     mnb_pred_probs = pickle.load(f)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_mlp_classifier_bert.pkl', 'rb') as f:
+    mlp_pred_probs = pickle.load(f)
+with open('/content/drive/MyDrive/CS626/iemocap/combined_lr_classifier.pkl', 'rb') as f:
+    lr_pred_probs = pickle.load(f)
+# with open('/content/drive/MyDrive/CS626/iemocap/combined_lstm_classifier.pkl', 'rb') as f:
+#     lstm_pred_probs = pickle.load(f)
+# Average of the predicted probabilites
+ensemble_pred_probs = (xgb_pred_probs +
+                       mlp_pred_probs +
+                       rf_pred_probs + 
+                      #  mnb_pred_probs +
+                       lr_pred_probs)
+# Show metrics
+display_results(y_test, ensemble_pred_probs)
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
+from transformers import TrainingArguments
+training_args = TrainingArguments("test_trainer")
+from transformers import Trainer
+trainer = Trainer(
+    model=model, args=training_args, train_dataset=combined_x_train, eval_dataset=combined_x_test
+)
+new_xtrain = np.concatenate(combined_x_train, y_train)
+trainer.train()
+import numpy as np
+from datasets import load_metric
+metric = load_metric("accuracy")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predictions = np.argmax(logits, axis=-1)
+    return metric.compute(predictions=predictions, references=labels)
\ No newline at end of file
--- a/feature_extraction.py
+++ b/feature_extraction.py
+# -*- coding: utf-8 -*-
+"""Feature_Extraction.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/18_6nXwUaTJTNZRgLkqr1w4KVrjHahufb
+# Extract labels from the evaluation files
+"""
+import re
+import os
+info_line = re.compile(r'\[.+\]\n', re.IGNORECASE)
+start_times, end_times, wav_file_names, emotions, vals, acts, doms = [], [], [], [], [], [], []
+for sess in range(1, 6):
+    emo_evaluation_dir = '/content/drive/MyDrive/CS626/iemocap/IEMOCAP_full_release/Session{}/dialog/EmoEvaluation/'.format(sess)
+    evaluation_files = [l for l in os.listdir(emo_evaluation_dir) if 'Ses' in l]
+    for file in evaluation_files:
+        with open(emo_evaluation_dir + file) as f:
+            content = f.read()
+        info_lines = re.findall(info_line, content)
+        for line in info_lines[1:]:  # the first line is a header
+            start_end_time, wav_file_name, emotion, val_act_dom = line.strip().split('\t')
+            start_time, end_time = start_end_time[1:-1].split('-')
+            val, act, dom = val_act_dom[1:-1].split(',')
+            val, act, dom = float(val), float(act), float(dom)
+            start_time, end_time = float(start_time), float(end_time)
+            start_times.append(start_time)
+            end_times.append(end_time)
+            wav_file_names.append(wav_file_name)
+            emotions.append(emotion)
+            vals.append(val)
+            acts.append(act)
+            doms.append(dom)
+import pandas as pd
+df_iemocap = pd.DataFrame(columns=['start_time', 'end_time', 'wav_file', 'emotion', 'val', 'act', 'dom'])
+df_iemocap['start_time'] = start_times
+df_iemocap['end_time'] = end_times
+df_iemocap['wav_file'] = wav_file_names
+df_iemocap['emotion'] = emotions
+df_iemocap['val'] = vals
+df_iemocap['act'] = acts
+df_iemocap['dom'] = doms
+df_iemocap.tail()
+df_iemocap.to_csv('/content/drive/MyDrive/CS626/iemocap/df_iemocap.csv', index=False)
+"""## Build Audio Vectors"""
+# Commented out IPython magic to ensure Python compatibility.
+import librosa
+import os
+import soundfile as sf
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.style as ms
+from tqdm import tqdm
+import pickle
+import IPython.display
+import librosa.display
+ms.use('seaborn-muted')
+# %matplotlib inline
+labels_df = pd.read_csv('/content/drive/MyDrive/CS626/iemocap/df_iemocap.csv')
+iemocap_dir = '/content/drive/MyDrive/CS626/iemocap/IEMOCAP_full_release/
+sr = 44100
+audio_vectors = {}
+for sess in range(1, 6):  # using one session due to memory constraint, can replace [5] with range(1, 6)
+    wav_file_path = '{}Session{}/dialog/wav/'.format(iemocap_dir, sess)
+    orig_wav_files = os.listdir(wav_file_path)
+    for orig_wav_file in tqdm(orig_wav_files):
+        try:
+            orig_wav_vector, _sr = librosa.load(wav_file_path + orig_wav_file, sr=sr)
+            orig_wav_file, file_format = orig_wav_file.split('.')
+            for index, row in labels_df[labels_df['wav_file'].str.contains(orig_wav_file)].iterrows():
+                start_time, end_time, truncated_wav_file_name, emotion, val, act, dom = row['start_time'], row['end_time'], row['wav_file'], row['emotion'], row['val'], row['act'], row['dom']
+                start_frame = math.floor(start_time * sr)
+                end_frame = math.floor(end_time * sr)
+                truncated_wav_vector = orig_wav_vector[start_frame:end_frame + 1]
+                audio_vectors[truncated_wav_file_name] = truncated_wav_vector
+        except:
+            print('An exception occured for {}'.format(orig_wav_file))
+    with open('/content/drive/MyDrive/CS626/iemocap/audio_vectors_{}.pkl'.format(sess), 'wb') as f:
+        pickle.dump(audio_vectors, f)
+"""## Extract Audio Features"""
+# Commented out IPython magic to ensure Python compatibility.
+import os
+import pickle
+import soundfile as sf
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.style as ms
+from tqdm import tqdm
+import librosa
+import math
+import random
+import pandas as pd
+import IPython.display
+import librosa.display
+ms.use('seaborn-muted')
+# %matplotlib inline
+columns = ['wav_file', 'label', 'sig_mean', 'sig_std', 'rmse_mean', 'rmse_std', 'silence', 'harmonic', 'auto_corr_max', 'auto_corr_std']
+df_features = pd.DataFrame(columns=columns)
+emotion_dict = {'ang': 0,
+                'hap': 1,
+                'exc': 2,
+                'sad': 3,
+                'fru': 4,
+                'fea': 5,
+                'sur': 6,
+                'neu': 7,
+                'xxx': 8,
+                'oth': 8}
+data_dir = '/content/drive/MyDrive/CS626/iemocap/'
+labels_path = '{}df_iemocap.csv'.format(data_dir)
+audio_vectors_path = '{}audio_vectors_'.format(data_dir)
+labels_df = pd.read_csv(labels_path)
+for sess in range(1, 6):
+        audio_vectors = pickle.load(open('{}{}.pkl'.format(audio_vectors_path, sess), 'rb'))
+        for index, row in tqdm(labels_df[labels_df['wav_file'].str.contains('Ses0{}'.format(sess))].iterrows()):
+            try:
+                wav_file_name = row['wav_file']
+                label = emotion_dict[row['emotion']]
+                y = audio_vectors[wav_file_name]
+                feature_list = [wav_file_name, label]  # wav_file, label
+                sig_mean = np.mean(abs(y))
+                feature_list.append(sig_mean)  # sig_mean
+                feature_list.append(np.std(y))  # sig_std
+                rmse = librosa.feature.rms(y + 0.0001)[0]
+                feature_list.append(np.mean(rmse))  # rmse_mean
+                feature_list.append(np.std(rmse))  # rmse_std
+                silence = 0
+                for e in rmse:
+                    if e <= 0.4 * np.mean(rmse):
+                        silence += 1
+                silence /= float(len(rmse))
+                feature_list.append(silence)  # silence
+                y_harmonic = librosa.effects.hpss(y)[0]
+                feature_list.append(np.mean(y_harmonic) * 1000)  # harmonic (scaled by 1000)
+                # based on the pitch detection algorithm mentioned here:
+                # http://access.feld.cvut.cz/view.php?cisloclanku=2009060001
+                cl = 0.45 * sig_mean
+                center_clipped = []
+                for s in y:
+                    if s >= cl:
+                        center_clipped.append(s - cl)
+                    elif s <= -cl:
+                        center_clipped.append(s + cl)
+                    elif np.abs(s) < cl:
+                        center_clipped.append(0)
+                auto_corrs = librosa.core.autocorrelate(np.array(center_clipped))
+                feature_list.append(1000 * np.max(auto_corrs)/len(auto_corrs))  # auto_corr_max (scaled by 1000)
+                feature_list.append(np.std(auto_corrs))  # auto_corr_std
+                df_features = df_features.append(pd.DataFrame(feature_list, index=columns).transpose(), ignore_index=True)
+            except:
+                print('Some exception occured')
+df_features.to_csv('/content/drive/MyDrive/CS626/iemocap/audio_features_4.csv', index=False)
+"""## Prepare Data"""
+# Commented out IPython magic to ensure Python compatibility.
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler
+from IPython.display import display
+# %matplotlib inline
+df = pd.read_csv('/content/drive/MyDrive/CS626/iemocap/audio_features.csv')
+df = df[df['label'].isin([0, 1, 2, 3, 4, 5, 6, 7])]
+print(df.shape)
+display(df.head())
+# change 7 to 2
+df['label'] = df['label'].map({0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 3, 6: 4, 7: 5})
+df.head()
+df.to_csv('/content/drive/MyDrive/CS626/iemocap/no_sample_df.csv')
+# oversample fear
+fear_df = df[df['label']==3]
+for i in range(30):
+    df = df.append(fear_df)
+sur_df = df[df['label']==4]
+for i in range(10):
+    df = df.append(sur_df)
+df.to_csv('/content/drive/MyDrive/CS626/iemocap/modified_df.csv')
+emotion_dict = {'ang': 0,
+                'hap': 1,
+                'sad': 2,
+                'neu': 3,}
+scalar = MinMaxScaler()
+df[df.columns[2:]] = scalar.fit_transform(df[df.columns[2:]])
+df.head()
+x_train, x_test = train_test_split(df, test_size=0.20)
+x_train.to_csv('/content/drive/MyDrive/CS626/iemocap/audio_train.csv', index=False)
+x_test.to_csv('/content/drive/MyDrive/CS626/iemocap/audio_test.csv', index=False)
+print(x_train.shape, x_test.shape)
+"""### Define preprocessing functions for text"""
+import re
+import os
+import pickle
+useful_regex = re.compile(r'^(\w+)', re.IGNORECASE)
+file2transcriptions = {}
+for sess in range(1, 6):
+    transcripts_path = '/content/drive/MyDrive/CS626/iemocap/IEMOCAP_full_release/Session{}/dialog/transcriptions/'.format(sess)
+    transcript_files = os.listdir(transcripts_path)
+    for f in transcript_files:
+        with open('{}{}'.format(transcripts_path, f), 'r') as f:
+            all_lines = f.readlines()
+        for l in all_lines:
+            audio_code = useful_regex.match(l).group()
+            transcription = l.split(':')[-1].strip()
+            # assuming that all the keys would be unique and hence no `try`
+            file2transcriptions[audio_code] = transcription
+# save dict
+with open('/content/drive/MyDrive/CS626/iemocap/audiocode2text.pkl', 'wb') as file:
+    pickle.dump(file2transcriptions, file)
+len(file2transcriptions)
+audiocode2text = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/audiocode2text.pkl', 'rb'))
+text_train = pd.DataFrame()
+text_train['wav_file'] = x_train['wav_file']
+text_train['label'] = x_train['label']
+text_train['transcription'] = [normalizeString(audiocode2text[code]) for code in x_train['wav_file']]
+text_test = pd.DataFrame()
+text_test['wav_file'] = x_test['wav_file']
+text_test['label'] = x_test['label']
+text_test['transcription'] = [normalizeString(audiocode2text[code]) for code in x_test['wav_file']]
+text_train.to_csv('/content/drive/MyDrive/CS626/iemocap/text_train.csv', index=False)
+text_test.to_csv('/content/drive/MyDrive/CS626/iemocap/text_test.csv', index=False)
+print(text_train.shape, text_test.shape)
\ No newline at end of file
--- a/feature_extraction_for_test_file.py
+++ b/feature_extraction_for_test_file.py
+# -*- coding: utf-8 -*-
+"""Feature_Extraction_for_test_file.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1I93vu44ZmJ-bLjyLf4r0jWV6LX8XlIHO
+# Get Audio File
+"""
+!sudo apt-get install -q -y timidity libsndfile1
+!pip install pydub numba==0.48 librosa music21
+!pip install SpeechRecognition pydub
+!pip install sentence_transformers
+from google.colab import drive
+drive.mount('/content/drive')
+# Commented out IPython magic to ensure Python compatibility.
+import tensorflow as tf
+import tensorflow_hub as hub
+import numpy as np
+import matplotlib.pyplot as plt
+import librosa
+from librosa import display as librosadisplay
+import logging
+import math
+import statistics
+import sys
+from IPython.display import Audio, Javascript
+from scipy.io import wavfile
+from base64 import b64decode
+import music21
+from pydub import AudioSegment
+import speech_recognition as sp_r
+import re
+import pandas as pd
+logger = logging.getLogger()
+logger.setLevel(logging.ERROR)
+print("tensorflow: %s" % tf.__version__)
+info_line = re.compile(r'\[.+\]\n', re.IGNORECASE)
+import os
+import pickle
+import soundfile as sf
+import matplotlib.pyplot as plt
+import matplotlib.style as ms
+from tqdm import tqdm
+import math
+import random
+import IPython.display
+import librosa.display
+ms.use('seaborn-muted')
+# %matplotlib inline
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler
+from IPython.display import display
+RECORD = """
+const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
+const b2text = blob => new Promise(resolve => {
+  const reader = new FileReader()
+  reader.onloadend = e => resolve(e.srcElement.result)
+  reader.readAsDataURL(blob)
+})
+var record = time => new Promise(async resolve => {
+  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+  recorder = new MediaRecorder(stream)
+  chunks = []
+  recorder.ondataavailable = e => chunks.push(e.data)
+  recorder.start()
+  await sleep(time)
+  recorder.onstop = async ()=>{
+    blob = new Blob(chunks)
+    text = await b2text(blob)
+    resolve(text)
+  }
+  recorder.stop()
+})
+"""
+def record(sec=10):
+  try:
+    from google.colab import output
+  except ImportError:
+    print('No possible to import output from google.colab')
+    return ''
+  else:
+    print('Recording')
+    display(Javascript(RECORD))
+    s = output.eval_js('record(%d)' % (sec*1000))
+    fname = 'recorded_audio.wav'
+    print('Saving to', fname)
+    b = b64decode(s.split(',')[1])
+    with open(fname, 'wb') as f:
+      f.write(b)
+    return fname
+#@title Select how to input your audio  { run: "auto" }
+INPUT_SOURCE = 'RECORD' #@param ["https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav", "RECORD", "UPLOAD", "./drive/My Drive/YOUR_MUSIC_FILE.wav"] {allow-input: true}
+audio_vectors = {}
+sr = 44100
+print('You selected', INPUT_SOURCE)
+def convert_audio_for_model(user_file, output_file='converted_audio_file.wav'):
+  audio = AudioSegment.from_file(user_file)
+  audio = audio.set_frame_rate(EXPECTED_SAMPLE_RATE).set_channels(1)
+  audio.export(output_file, format="wav")
+  return output_file
+if INPUT_SOURCE == 'RECORD':
+  uploaded_file_name = record(7)
+  EXPECTED_SAMPLE_RATE = 16000
+  converted_audio_file = convert_audio_for_model(uploaded_file_name)
+  sample_rate, audio_samples = wavfile.read(converted_audio_file, 'rb')
+  orig_wav_vector, _sr = librosa.load(converted_audio_file, sr=sr)
+  orig_wav_file = "converted_audio_file"
+  audio_vectors[orig_wav_file] = orig_wav_vector
+  text = get_text_from_audio()
+  df_features, columns = get_feature_frame()
+  df_features = get_features(audio_vectors, df_features, columns)
+  save_features(audio_vectors)
+  df_features = apply_scaling(df_features)
+  encoding = get_text_encoding(text)
+  print(text)
+  probs = get_probs(encoding)
+  print(probs)
+elif INPUT_SOURCE == 'UPLOAD':
+  wav_file_path = '/content/drive/MyDrive/CS626/test/'
+  orig_wav_files = os.listdir(wav_file_path)
+  for orig_wav_file in tqdm(orig_wav_files):
+      # try:
+      orig_wav_vector, _sr = librosa.load(wav_file_path + orig_wav_file, sr=sr)
+      orig_wav_file, file_format = orig_wav_file.split('.')
+      audio_vectors[orig_wav_file] = orig_wav_vector
+      df_features, columns = get_feature_frame()
+      df_features = get_features(audio_vectors, df_features, columns)
+      save_features(audio_vectors)
+      df_features = apply_scaling(df_features)
+      with open('/content/drive/MyDrive/CS626/test_pkl/sample_text.txt', 'r') as f:
+        text_example = f.readline()
+      encoding = get_text_encoding(text_example)
+      probs = get_probs(encoding)
+      print(probs)
+      # except:
+      #     print('An exception occured for {}'.format(orig_wav_file))
+"""# Get Text file"""
+def get_text_from_audio():
+  filename = "converted_audio_file.wav"
+  r = sp_r.Recognizer()
+  with sp_r.AudioFile(filename) as source:
+    audio_data = r.record(source)
+    text = r.recognize_google(audio_data)
+    print(text)
+  return text
+"""## Build Audio Vectors"""
+def save_features(audio_vectors):
+  with open('/content/drive/MyDrive/CS626/test_pkl/audio_vector_1.pkl', 'wb') as f:
+      pickle.dump(audio_vectors, f)
+"""## Extract Audio Features"""
+def get_feature_frame():
+  columns = ['wav_file', 'sig_mean', 'sig_std', 'rmse_mean', 'rmse_std', 'silence', 'harmonic', 'auto_corr_max', 'auto_corr_std']
+  df_features = pd.DataFrame(columns=columns)
+  return df_features, columns
+def get_features(audio_vectors, df_features, columns):
+  for wav_file_name in audio_vectors.keys():
+      try:
+          y = audio_vectors[wav_file_name]
+          feature_list = [wav_file_name]
+          sig_mean = np.mean(abs(y))
+          feature_list.append(sig_mean)
+          feature_list.append(np.std(y))
+          rmse = librosa.feature.rms(y + 0.0001)[0]
+          feature_list.append(np.mean(rmse))
+          feature_list.append(np.std(rmse))
+          silence = 0
+          for e in rmse:
+              if e <= 0.4 * np.mean(rmse):
+                  silence += 1
+          silence /= float(len(rmse))
+          feature_list.append(silence)
+          y_harmonic = librosa.effects.hpss(y)[0]
+          feature_list.append(np.mean(y_harmonic) * 1000)
+          cl = 0.45 * sig_mean
+          center_clipped = []
+          for s in y:
+              if s >= cl:
+                  center_clipped.append(s - cl)
+              elif s <= -cl:
+                  center_clipped.append(s + cl)
+              elif np.abs(s) < cl:
+                  center_clipped.append(0)
+          auto_corrs = librosa.core.autocorrelate(np.array(center_clipped))
+          feature_list.append(1000 * np.max(auto_corrs)/len(auto_corrs))  # auto_corr_max (scaled by 1000)
+          feature_list.append(np.std(auto_corrs))  # auto_corr_std
+          df_features = df_features.append(pd.DataFrame(feature_list, index=columns).transpose(), ignore_index=True)
+      except:
+          print('Some exception occured')
+  df_features.to_csv('/content/drive/MyDrive/CS626/test_pkl/audio_features_1.csv', index=False)
+  return df_features
+"""## Prepare Data"""
+def apply_scaling(df): 
+  with open('/content/drive/MyDrive/CS626/test_pkl/scalar.pkl', 'rb') as f:
+    scalar = pickle.load(f)
+  df[df.columns[1:]] = scalar.transform(df[df.columns[1:]])
+  return df
+"""### Define preprocessing functions for text"""
+def get_text_encoding(text_example):
+  from sentence_transformers import SentenceTransformer
+  bert = SentenceTransformer('bert-base-uncased')
+  encoding = bert.encode(text_example)
+  x_test_text = encoding
+  x_test_text = x_test_text.reshape(-1, 768)
+  return x_test_text
+def get_probs(x_test_text):
+  x_test_audio = pd.read_csv('/content/drive/MyDrive/CS626/test_pkl/audio_features_1.csv')
+  combined_x_test = np.concatenate((np.array(x_test_audio[x_test_audio.columns[1:]]), x_test_text), axis=1)
+  xgb = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/XGB_bert.pkl', 'rb'))
+  probs_xgb = xgb.predict_proba(combined_x_test)
+  mlp = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/MLP_bert.pkl', 'rb'))
+  probs_mlp = mlp.predict_proba(combined_x_test)
+  rf = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/RF_bert.pkl', 'rb'))
+  probs_rf = rf.predict_proba(combined_x_test)
+  lr = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/LR.pkl', 'rb'))
+  probs_lr = mlp.predict_proba(combined_x_test)
+  probs = probs_xgb + probs_mlp + probs_rf + probs_lr
+  # print(probs_xgb + probs_mlp + probs_rf + probs_lr)
+  emotion_dict = {0 : 'Angry',
+                  1 : 'Happy',
+                  2 : 'Sad',
+                  3 : 'Fear',
+                  4 : 'Surprised',
+                  5 : 'Neutral'
+                  }
+  index = np.argmax(probs)
+  print("\n\nDetected Emotion : ", emotion_dict[index])
+  print("Emotion Dictionary : ", emotion_dict)
+  return probs
\ No newline at end of file
--- a/readme.txt
+++ b/readme.txt
+Instructions for running the code :
+1. Run feature_extraction file first.
+python3 feature_extraction.py
+2. Run combined_classification file to train the model.
+python3 combined_classification.py
+3. Run feature_extraction_for_test_file file to evaluate model on a single file.python3 feature_extraction_for_test_file.py
+The dataset is hosted on drive.
+Link : https://drive.google.com/drive/folders/12ELUg5aEfyd9BLcje6nzH10U5KwAA8tP?usp=sharing