Commit fe527704 authored by Smit Gangurde's avatar Smit Gangurde

Updated readme

parent c26cf5de
#Ignore Dataset folders, too big for git
#Smaller dataset, Version 1
dataset1/
#Larger dataset, Version 2
dataset2/
#Ignore python cache #Ignore python cache
__pycache__/ __pycache__/
#Add any other local/ide files that don't belong in git
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Dataset1.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "-jen3DITEVt2",
"outputId": "f74b2601-aca6-4b79-8042-a46066957545",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 86
}
},
"source": [
"#############Setup dataset#############\n",
"!pip install gdown --quiet\n",
"!gdown --id 1zTI002FEm0BcbXlUFpLs5zvurWGnx68v #id for dataset1.tar.gz\n",
"!tar -zxf dataset1.tar.gz\n",
"!rm dataset1.tar.gz\n",
"!rm -r sample_data #remove the default colab sample_data"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Downloading...\n",
"From: https://drive.google.com/uc?id=1zTI002FEm0BcbXlUFpLs5zvurWGnx68v\n",
"To: /content/dataset1.tar.gz\n",
"\r0.00B [00:00, ?B/s]\r19.4MB [00:00, 190MB/s]\r27.7MB [00:00, 169MB/s]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wr6oah1TGi9R"
},
"source": [
"#some global variables\n",
"#Change any variable as required\n",
"dataset_dir = 'dataset1/'\n",
"train_dir = 'train/'\n",
"test_dir = 'test/'\n",
"train_csv = 'train.csv'\n",
"test_csv = 'test.csv'\n",
"###########################Image Variables###########################\n",
"std_ht = 224 #standard height -> height we want all images to have\n",
"std_wd = 224 #standard width -> width we want all images to have\n",
"#opencv image numpy array format -> (height, width, channels)"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "VzS-iaUkFqUJ",
"outputId": "b546a1f4-8f7d-45bf-f240-5b85a922b648",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
}
},
"source": [
"#Update versions if required\n",
"import cv2\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"print(\"opencv version: {}\".format(cv2.__version__))\n",
"print(\"numpy version: {}\".format(np.__version__))\n",
"print(\"pandas version: {}\".format(pd.__version__))"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"opencv version: 4.1.2\n",
"numpy version: 1.18.5\n",
"pandas version: 1.1.3\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5P_niPzuHx8l"
},
"source": [
"#############Preprocessing#############\n",
"#Change the function as required\n",
"#Maybe if you are more comfortable with passing final image sizes\n",
"#as arguments, do that, Currently I have kept it as a global variable\n",
"#Or maybe you prefer, numpy array of 4 shapes instead of list of img numpy arrays\n",
"def preprocess_imgs(img_dir, csv_file, isTrain=False):\n",
" '''\n",
" Resizes the images to std_ht and std_wt\n",
" Order of iteration over images is same as the csv file\n",
" Returns list of resized imgs(numpy arrays), and list of corresponding labels if isTrain=True\n",
" Else returns a list of resized imgs(numpy arrays) \n",
" '''\n",
" dir_path = dataset_dir + img_dir\n",
" csv_df = pd.read_csv(dataset_dir + csv_file)\n",
" if isTrain: labels=[]\n",
" resized_imgs=[]\n",
" for i in range(len(csv_df)):\n",
" temp = cv2.imread(dir_path+csv_df['Image'][i])\n",
" #cv2.resize takes final size as (width, height)\n",
" #Change cv2.INTER_CUBIC to any other interpolation, as required\n",
" resized_imgs.append(cv2.resize(temp, dsize=(std_wd, std_ht), interpolation=cv2.INTER_CUBIC))\n",
" if isTrain: labels.append(csv_df['target'][i])\n",
" return (resized_imgs, labels) if isTrain else (resized_imgs)"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nKEaLC9fH_Sd"
},
"source": [
"train_imgs, train_labels = preprocess_imgs(train_dir, train_csv, True)\n",
"test_imgs = preprocess_imgs(test_dir, test_csv, False)"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "-M60cZUviDvO",
"outputId": "bdd8586c-8c19-4b14-8df9-2c13bd9cefe2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"#test\n",
"print(train_imgs[0].shape, train_labels[0])\n",
"#For further testing, you can save any img array as an image and check dimensions\n",
"#eg: cv2.imwrite('test.jpg', train_imgs[3])"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"(224, 224, 3) manipuri\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gx9_F78BszSW"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Dataset2.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "LkfJiyyjjfwW",
"outputId": "0914f216-8efe-496f-abf7-30f2fb729d20",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 86
}
},
"source": [
"#############Setup dataset#############\n",
"!pip install gdown --quiet\n",
"!gdown --id 1I29xdtdJVd7FoT7Uyw1QHafKyRzsqPzK #id for dataset2.tar.gz\n",
"!tar -zxf dataset2.tar.gz\n",
"!rm dataset2.tar.gz\n",
"!rm -r sample_data #remove the default colab sample_data"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Downloading...\n",
"From: https://drive.google.com/uc?id=1I29xdtdJVd7FoT7Uyw1QHafKyRzsqPzK\n",
"To: /content/dataset2.tar.gz\n",
"235MB [00:01, 166MB/s]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "yRNAzxroj4hW"
},
"source": [
"#some global variables\n",
"#change as required\n",
"dataset_dir = 'dataset2/'\n",
"train_dir = 'train/'\n",
"test_dir = 'test/'\n",
"validation_dir = 'validation/'\n",
"train_csv = 'train.csv'\n",
"test_csv = 'test.csv'\n",
"###########################Image Variables###########################\n",
"std_ht = 224 #standard height -> height we want all images to have\n",
"std_wd = 224 #standard width -> width we want all images to have\n",
"#opencv image numpy array format -> (height, width, channels)"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "6ybstahmkxTB",
"outputId": "4ad3d21b-e4ad-4371-ce6a-d879aa167cd0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
}
},
"source": [
"#Update versions as required\n",
"import os\n",
"import cv2\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"print(\"opencv version: {}\".format(cv2.__version__))\n",
"print(\"numpy version: {}\".format(np.__version__))\n",
"print(\"pandas version: {}\".format(pd.__version__))"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"opencv version: 4.1.2\n",
"numpy version: 1.18.5\n",
"pandas version: 1.1.3\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "C6xUzHwMlBD7"
},
"source": [
"#############Preprocessing#############\n",
"#Change the function as required\n",
"#Maybe if you are more comfortable with passing final image sizes\n",
"#as arguments, do that, Currently I have kept it as a global variable\n",
"#Or maybe you prefer, numpy array of 4 shapes instead of list of img numpy arrays\n",
"def preprocess_imgs(img_dir, isTrain=False):\n",
" '''\n",
" Resizes the images to std_ht and std_wt\n",
" Order of iteration over images is same os.listdir()\n",
" Returns list of resized imgs(numpy arrays), and list of corresponding labels if isTrain=True\n",
" Else returns a list of resized imgs(numpy arrays) \n",
" '''\n",
" dir_path = dataset_dir + img_dir\n",
" if isTrain: labels = []\n",
" resized_imgs = []\n",
" if isTrain:\n",
" label_dirs = os.listdir(dataset_dir+img_dir)\n",
" for label in label_dirs:\n",
" img_list = os.listdir(dataset_dir + img_dir + label + '/')\n",
" for img in img_list:\n",
" #sloppy code, can chdir, but meh\n",
" temp = cv2.imread(dataset_dir + img_dir + label + '/' + img)\n",
" #cv2.resize takes final size as (width, height)\n",
" #Change cv2.INTER_CUBIC to any other interpolation, as required\n",
" resized_imgs.append(cv2.resize(temp, dsize=(std_wd, std_ht), interpolation=cv2.INTER_CUBIC))\n",
" labels.append(label)\n",
" return (resized_imgs, labels)\n",
" else:\n",
" img_list = os.listdir(dataset_dir + img_dir)\n",
" for img in img_list:\n",
" temp = cv2.imread(dataset_dir + img_dir + img)\n",
" #cv2.resize takes final size as (width, height)\n",
" #Change cv2.INTER_CUBIC to any other interpolation, as required\n",
" resized_imgs.append(cv2.resize(temp, dsize=(std_wd, std_ht), interpolation=cv2.INTER_CUBIC))\n",
" return resized_imgs"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Cn60kFfNqc2K"
},
"source": [
"train_imgs, train_labels = preprocess_imgs(train_dir, True)\n",
"dev_imgs, dev_labels = preprocess_imgs(validation_dir, True)\n",
"test_imgs = preprocess_imgs(test_dir, False)"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "hCQMlxehquo3",
"outputId": "5265b283-eda5-406b-99b3-0d15f1b1c1c6",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
}
},
"source": [
"#test\n",
"print(train_imgs[0].shape, train_labels[0], len(train_imgs), len(train_labels))\n",
"print(dev_imgs[0].shape, dev_labels[0], len(dev_imgs), len(dev_labels))\n",
"print(test_imgs[0].shape, len(test_imgs))\n",
"#For further validation you can save any img array as image and check the dimensions\n",
"#eg: cv2.imwrite('test.jpg', train_imgs[300])\n"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"(224, 224, 3) sattriya 5000 5000\n",
"(224, 224, 3) sattriya 364 364\n",
"(224, 224, 3) 156\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "03qTDNSesynZ"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
# Indian Classical Dance Classification from Dance Poses
### Authors:
Aarushi Aiyyar : 203050045
Bhavesh Yadav : 193050052
Khyati Oswal : 203050058
Raj Gite : 203050092
Smit Gangurde : 203050108
Yavnika Bhagat : 203050041
### Problem Statement:
Identify the type of Indian dance form from a dance pose image. Use multiple techniques to classify the images and compare the various techniques.
### Dataset:
https://www.kaggle.com/somnath796/indian-dance-form-recognition
Train Images: 364 | Test Images: 156
Number of classes: 8
### Techniques used and their test accuracies:
|Implementation |Test Accuracy |
--|--
|Custom CNN model|46.79%|
|VGG16 | 48% |
|ResNet152|77.56%|
Implemented Cross Validation.
#### Conclusion: ResNet152 performed the best.
### Running the code:
Python Notebooks along with the necessary code are provided in the repository, in the folder 'Notebooks/'.
1. Custom CNN notebook:
Open the notebook, import the 'helpers' directory and run the cells.
2. VGG16:
Open the notebook, and run the cells.
3. ResNet152 using Monk:
i. You can directly download the ipynb file and run it on kaggle
ii. Upload dataset with appropriate data structure
iii. Download the uploaded .py file, according to the numbers given in the file, execute commands on Kaggle notebook.
### References:
1. https://www.kaggle.com/singhuday/identifythedanceform/version/1
2. https://github.com/Tessellate-Imaging/monk_v1
3. https://clever-noyce-f9d43f.netlify.app/#/introduction
4. https://www.youtube.com/watch?v=zFA8Cm13Xmk
5. https://medium.com/@dtuk81/confusion-matrix-visualization-fc31e3f30fea
### Reference paper links:
1. https://arxiv.org/pdf/1812.05231.pd
2. https://www.isical.ac.in/~vlrg/sites/default/files/Pulak/wacv2012.pdf
3. https://ieeexplore.ieee.org/document/9182365
**************README**************
TEAM:
Aarushi Aiyyer - 203050045
Bhavesh Yadav - 193050052
Khyati Oswal - 203050058
Raj Gite - 203050092
Smit Gangurde - 203050108
Yavnika Bhagat - 203050041
Problem Statement:
Identify the type of Indian dance form from a dance pose image
Use multiple techniques to classify the images and compare them
Dataset Links:
1) https://www.kaggle.com/somnath796/indian-dance-form-recognition 27MB
Conventions:
- dataset1/ : Dataset Version 1
Paper Links:
1) https://arxiv.org/pdf/1812.05231.pdf (IIT Delhi)
2) https://www.isical.ac.in/~vlrg/sites/default/files/Pulak/wacv2012.pdf (ISI)
3) https://ieeexplore.ieee.org/document/9182365
Helpful reference code:
1)https://www.kaggle.com/singhuday/identifythedanceform/version/1
2)https://github.com/Tessellate-Imaging/monk_v1
3)https://clever-noyce-f9d43f.netlify.app/#/introduction
4)https://www.youtube.com/watch?v=zFA8Cm13Xmk
5)https://medium.com/@dtuk81/confusion-matrix-visualization-fc31e3f30fea
Added Dataset Version 1 to Google Drive: https://drive.google.com/file/d/1zTI002FEm0BcbXlUFpLs5zvurWGnx68v/view?usp=sharing
The Repository contains 3 notebooks:
1. Custom CNN
2. VGG16
3. RESNET152
Execution:
For RESNET152usingMonk:
1. You can directly download the ipynb file and run it on Kaggle.
2. Upload dataset with appropriate datastructure.
3. Download the uploaded .py file, according to the numbers given in the file, execute commands on Kaggle notebook.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment