Commit d645b63d authored by Nilesh Jagdish's avatar Nilesh Jagdish

Added project files

parent fcfae6f0
This diff is collapsed.
This diff is collapsed.
# -*- coding: utf-8 -*-
"""Feature_Extraction_for_test_file.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1I93vu44ZmJ-bLjyLf4r0jWV6LX8XlIHO
# Get Audio File
"""
!sudo apt-get install -q -y timidity libsndfile1
!pip install pydub numba==0.48 librosa music21
!pip install SpeechRecognition pydub
!pip install sentence_transformers
from google.colab import drive
drive.mount('/content/drive')
# Commented out IPython magic to ensure Python compatibility.
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt
import librosa
from librosa import display as librosadisplay
import logging
import math
import statistics
import sys
from IPython.display import Audio, Javascript
from scipy.io import wavfile
from base64 import b64decode
import music21
from pydub import AudioSegment
import speech_recognition as sp_r
import re
import pandas as pd
logger = logging.getLogger()
logger.setLevel(logging.ERROR)
print("tensorflow: %s" % tf.__version__)
info_line = re.compile(r'\[.+\]\n', re.IGNORECASE)
import os
import pickle
import soundfile as sf
import matplotlib.pyplot as plt
import matplotlib.style as ms
from tqdm import tqdm
import math
import random
import IPython.display
import librosa.display
ms.use('seaborn-muted')
# %matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
await sleep(time)
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""
def record(sec=10):
try:
from google.colab import output
except ImportError:
print('No possible to import output from google.colab')
return ''
else:
print('Recording')
display(Javascript(RECORD))
s = output.eval_js('record(%d)' % (sec*1000))
fname = 'recorded_audio.wav'
print('Saving to', fname)
b = b64decode(s.split(',')[1])
with open(fname, 'wb') as f:
f.write(b)
return fname
#@title Select how to input your audio { run: "auto" }
INPUT_SOURCE = 'RECORD' #@param ["https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav", "RECORD", "UPLOAD", "./drive/My Drive/YOUR_MUSIC_FILE.wav"] {allow-input: true}
audio_vectors = {}
sr = 44100
print('You selected', INPUT_SOURCE)
def convert_audio_for_model(user_file, output_file='converted_audio_file.wav'):
audio = AudioSegment.from_file(user_file)
audio = audio.set_frame_rate(EXPECTED_SAMPLE_RATE).set_channels(1)
audio.export(output_file, format="wav")
return output_file
if INPUT_SOURCE == 'RECORD':
uploaded_file_name = record(7)
EXPECTED_SAMPLE_RATE = 16000
converted_audio_file = convert_audio_for_model(uploaded_file_name)
sample_rate, audio_samples = wavfile.read(converted_audio_file, 'rb')
orig_wav_vector, _sr = librosa.load(converted_audio_file, sr=sr)
orig_wav_file = "converted_audio_file"
audio_vectors[orig_wav_file] = orig_wav_vector
text = get_text_from_audio()
df_features, columns = get_feature_frame()
df_features = get_features(audio_vectors, df_features, columns)
save_features(audio_vectors)
df_features = apply_scaling(df_features)
encoding = get_text_encoding(text)
print(text)
probs = get_probs(encoding)
print(probs)
elif INPUT_SOURCE == 'UPLOAD':
wav_file_path = '/content/drive/MyDrive/CS626/test/'
orig_wav_files = os.listdir(wav_file_path)
for orig_wav_file in tqdm(orig_wav_files):
# try:
orig_wav_vector, _sr = librosa.load(wav_file_path + orig_wav_file, sr=sr)
orig_wav_file, file_format = orig_wav_file.split('.')
audio_vectors[orig_wav_file] = orig_wav_vector
df_features, columns = get_feature_frame()
df_features = get_features(audio_vectors, df_features, columns)
save_features(audio_vectors)
df_features = apply_scaling(df_features)
with open('/content/drive/MyDrive/CS626/test_pkl/sample_text.txt', 'r') as f:
text_example = f.readline()
encoding = get_text_encoding(text_example)
probs = get_probs(encoding)
print(probs)
# except:
# print('An exception occured for {}'.format(orig_wav_file))
"""# Get Text file"""
def get_text_from_audio():
filename = "converted_audio_file.wav"
r = sp_r.Recognizer()
with sp_r.AudioFile(filename) as source:
audio_data = r.record(source)
text = r.recognize_google(audio_data)
print(text)
return text
"""## Build Audio Vectors"""
def save_features(audio_vectors):
with open('/content/drive/MyDrive/CS626/test_pkl/audio_vector_1.pkl', 'wb') as f:
pickle.dump(audio_vectors, f)
"""## Extract Audio Features"""
def get_feature_frame():
columns = ['wav_file', 'sig_mean', 'sig_std', 'rmse_mean', 'rmse_std', 'silence', 'harmonic', 'auto_corr_max', 'auto_corr_std']
df_features = pd.DataFrame(columns=columns)
return df_features, columns
def get_features(audio_vectors, df_features, columns):
for wav_file_name in audio_vectors.keys():
try:
y = audio_vectors[wav_file_name]
feature_list = [wav_file_name]
sig_mean = np.mean(abs(y))
feature_list.append(sig_mean)
feature_list.append(np.std(y))
rmse = librosa.feature.rms(y + 0.0001)[0]
feature_list.append(np.mean(rmse))
feature_list.append(np.std(rmse))
silence = 0
for e in rmse:
if e <= 0.4 * np.mean(rmse):
silence += 1
silence /= float(len(rmse))
feature_list.append(silence)
y_harmonic = librosa.effects.hpss(y)[0]
feature_list.append(np.mean(y_harmonic) * 1000)
cl = 0.45 * sig_mean
center_clipped = []
for s in y:
if s >= cl:
center_clipped.append(s - cl)
elif s <= -cl:
center_clipped.append(s + cl)
elif np.abs(s) < cl:
center_clipped.append(0)
auto_corrs = librosa.core.autocorrelate(np.array(center_clipped))
feature_list.append(1000 * np.max(auto_corrs)/len(auto_corrs)) # auto_corr_max (scaled by 1000)
feature_list.append(np.std(auto_corrs)) # auto_corr_std
df_features = df_features.append(pd.DataFrame(feature_list, index=columns).transpose(), ignore_index=True)
except:
print('Some exception occured')
df_features.to_csv('/content/drive/MyDrive/CS626/test_pkl/audio_features_1.csv', index=False)
return df_features
"""## Prepare Data"""
def apply_scaling(df):
with open('/content/drive/MyDrive/CS626/test_pkl/scalar.pkl', 'rb') as f:
scalar = pickle.load(f)
df[df.columns[1:]] = scalar.transform(df[df.columns[1:]])
return df
"""### Define preprocessing functions for text"""
def get_text_encoding(text_example):
from sentence_transformers import SentenceTransformer
bert = SentenceTransformer('bert-base-uncased')
encoding = bert.encode(text_example)
x_test_text = encoding
x_test_text = x_test_text.reshape(-1, 768)
return x_test_text
def get_probs(x_test_text):
x_test_audio = pd.read_csv('/content/drive/MyDrive/CS626/test_pkl/audio_features_1.csv')
combined_x_test = np.concatenate((np.array(x_test_audio[x_test_audio.columns[1:]]), x_test_text), axis=1)
xgb = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/XGB_bert.pkl', 'rb'))
probs_xgb = xgb.predict_proba(combined_x_test)
mlp = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/MLP_bert.pkl', 'rb'))
probs_mlp = mlp.predict_proba(combined_x_test)
rf = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/RF_bert.pkl', 'rb'))
probs_rf = rf.predict_proba(combined_x_test)
lr = pickle.load(open('/content/drive/MyDrive/CS626/iemocap/LR.pkl', 'rb'))
probs_lr = mlp.predict_proba(combined_x_test)
probs = probs_xgb + probs_mlp + probs_rf + probs_lr
# print(probs_xgb + probs_mlp + probs_rf + probs_lr)
emotion_dict = {0 : 'Angry',
1 : 'Happy',
2 : 'Sad',
3 : 'Fear',
4 : 'Surprised',
5 : 'Neutral'
}
index = np.argmax(probs)
print("\n\nDetected Emotion : ", emotion_dict[index])
print("Emotion Dictionary : ", emotion_dict)
return probs
\ No newline at end of file
Instructions for running the code :
1. Run feature_extraction file first.
python3 feature_extraction.py
2. Run combined_classification file to train the model.
python3 combined_classification.py
3. Run feature_extraction_for_test_file file to evaluate model on a single file.python3 feature_extraction_for_test_file.py
The dataset is hosted on drive.
Link : https://drive.google.com/drive/folders/12ELUg5aEfyd9BLcje6nzH10U5KwAA8tP?usp=sharing
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment