Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
CS725_Project
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
Smit Gangurde
CS725_Project
Commits
6bc2fe0d
Commit
6bc2fe0d
authored
Oct 27, 2020
by
Smit Gangurde
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added notebooks for preprocessing(resizing imgs)
parent
061123c6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
391 additions
and
0 deletions
+391
-0
Notebooks/Dataset1_preprocess.ipynb
Notebooks/Dataset1_preprocess.ipynb
+186
-0
Notebooks/Dataset2_preprocess.ipynb
Notebooks/Dataset2_preprocess.ipynb
+205
-0
No files found.
Notebooks/Dataset1_preprocess.ipynb
0 → 100644
View file @
6bc2fe0d
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Dataset1.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "-jen3DITEVt2",
"outputId": "f74b2601-aca6-4b79-8042-a46066957545",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 86
}
},
"source": [
"#############Setup dataset#############\n",
"!pip install gdown --quiet\n",
"!gdown --id 1zTI002FEm0BcbXlUFpLs5zvurWGnx68v #id for dataset1.tar.gz\n",
"!tar -zxf dataset1.tar.gz\n",
"!rm dataset1.tar.gz\n",
"!rm -r sample_data #remove the default colab sample_data"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Downloading...\n",
"From: https://drive.google.com/uc?id=1zTI002FEm0BcbXlUFpLs5zvurWGnx68v\n",
"To: /content/dataset1.tar.gz\n",
"\r0.00B [00:00, ?B/s]\r19.4MB [00:00, 190MB/s]\r27.7MB [00:00, 169MB/s]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wr6oah1TGi9R"
},
"source": [
"#some global variables\n",
"#Change any variable as required\n",
"dataset_dir = 'dataset1/'\n",
"train_dir = 'train/'\n",
"test_dir = 'test/'\n",
"train_csv = 'train.csv'\n",
"test_csv = 'test.csv'\n",
"###########################Image Variables###########################\n",
"std_ht = 224 #standard height -> height we want all images to have\n",
"std_wd = 224 #standard width -> width we want all images to have\n",
"#opencv image numpy array format -> (height, width, channels)"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "VzS-iaUkFqUJ",
"outputId": "b546a1f4-8f7d-45bf-f240-5b85a922b648",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
}
},
"source": [
"#Update versions if required\n",
"import cv2\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"print(\"opencv version: {}\".format(cv2.__version__))\n",
"print(\"numpy version: {}\".format(np.__version__))\n",
"print(\"pandas version: {}\".format(pd.__version__))"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"opencv version: 4.1.2\n",
"numpy version: 1.18.5\n",
"pandas version: 1.1.3\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5P_niPzuHx8l"
},
"source": [
"#############Preprocessing#############\n",
"#Change the function as required\n",
"#Maybe if you are more comfortable with passing final image sizes\n",
"#as arguments, do that, Currently I have kept it as a global variable\n",
"#Or maybe you prefer, numpy array of 4 shapes instead of list of img numpy arrays\n",
"def preprocess_imgs(img_dir, csv_file, isTrain=False):\n",
" '''\n",
" Resizes the images to std_ht and std_wt\n",
" Order of iteration over images is same as the csv file\n",
" Returns list of resized imgs(numpy arrays), and list of corresponding labels if isTrain=True\n",
" Else returns a list of resized imgs(numpy arrays) \n",
" '''\n",
" dir_path = dataset_dir + img_dir\n",
" csv_df = pd.read_csv(dataset_dir + csv_file)\n",
" if isTrain: labels=[]\n",
" resized_imgs=[]\n",
" for i in range(len(csv_df)):\n",
" temp = cv2.imread(dir_path+csv_df['Image'][i])\n",
" #cv2.resize takes final size as (width, height)\n",
" #Change cv2.INTER_CUBIC to any other interpolation, as required\n",
" resized_imgs.append(cv2.resize(temp, dsize=(std_wd, std_ht), interpolation=cv2.INTER_CUBIC))\n",
" if isTrain: labels.append(csv_df['target'][i])\n",
" return (resized_imgs, labels) if isTrain else (resized_imgs)"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nKEaLC9fH_Sd"
},
"source": [
"train_imgs, train_labels = preprocess_imgs(train_dir, train_csv, True)\n",
"test_imgs = preprocess_imgs(test_dir, test_csv, False)"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "-M60cZUviDvO",
"outputId": "bdd8586c-8c19-4b14-8df9-2c13bd9cefe2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"#test\n",
"print(train_imgs[0].shape, train_labels[0])\n",
"#For further testing, you can save any img array as an image and check dimensions\n",
"#eg: cv2.imwrite('test.jpg', train_imgs[3])"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"(224, 224, 3) manipuri\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gx9_F78BszSW"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
Notebooks/Dataset2_preprocess.ipynb
0 → 100644
View file @
6bc2fe0d
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Dataset2.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "LkfJiyyjjfwW",
"outputId": "0914f216-8efe-496f-abf7-30f2fb729d20",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 86
}
},
"source": [
"#############Setup dataset#############\n",
"!pip install gdown --quiet\n",
"!gdown --id 1I29xdtdJVd7FoT7Uyw1QHafKyRzsqPzK #id for dataset2.tar.gz\n",
"!tar -zxf dataset2.tar.gz\n",
"!rm dataset2.tar.gz\n",
"!rm -r sample_data #remove the default colab sample_data"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Downloading...\n",
"From: https://drive.google.com/uc?id=1I29xdtdJVd7FoT7Uyw1QHafKyRzsqPzK\n",
"To: /content/dataset2.tar.gz\n",
"235MB [00:01, 166MB/s]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "yRNAzxroj4hW"
},
"source": [
"#some global variables\n",
"#change as required\n",
"dataset_dir = 'dataset2/'\n",
"train_dir = 'train/'\n",
"test_dir = 'test/'\n",
"validation_dir = 'validation/'\n",
"train_csv = 'train.csv'\n",
"test_csv = 'test.csv'\n",
"###########################Image Variables###########################\n",
"std_ht = 224 #standard height -> height we want all images to have\n",
"std_wd = 224 #standard width -> width we want all images to have\n",
"#opencv image numpy array format -> (height, width, channels)"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "6ybstahmkxTB",
"outputId": "4ad3d21b-e4ad-4371-ce6a-d879aa167cd0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
}
},
"source": [
"#Update versions as required\n",
"import os\n",
"import cv2\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"print(\"opencv version: {}\".format(cv2.__version__))\n",
"print(\"numpy version: {}\".format(np.__version__))\n",
"print(\"pandas version: {}\".format(pd.__version__))"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"opencv version: 4.1.2\n",
"numpy version: 1.18.5\n",
"pandas version: 1.1.3\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "C6xUzHwMlBD7"
},
"source": [
"#############Preprocessing#############\n",
"#Change the function as required\n",
"#Maybe if you are more comfortable with passing final image sizes\n",
"#as arguments, do that, Currently I have kept it as a global variable\n",
"#Or maybe you prefer, numpy array of 4 shapes instead of list of img numpy arrays\n",
"def preprocess_imgs(img_dir, isTrain=False):\n",
" '''\n",
" Resizes the images to std_ht and std_wt\n",
" Order of iteration over images is same os.listdir()\n",
" Returns list of resized imgs(numpy arrays), and list of corresponding labels if isTrain=True\n",
" Else returns a list of resized imgs(numpy arrays) \n",
" '''\n",
" dir_path = dataset_dir + img_dir\n",
" if isTrain: labels = []\n",
" resized_imgs = []\n",
" if isTrain:\n",
" label_dirs = os.listdir(dataset_dir+img_dir)\n",
" for label in label_dirs:\n",
" img_list = os.listdir(dataset_dir + img_dir + label + '/')\n",
" for img in img_list:\n",
" #sloppy code, can chdir, but meh\n",
" temp = cv2.imread(dataset_dir + img_dir + label + '/' + img)\n",
" #cv2.resize takes final size as (width, height)\n",
" #Change cv2.INTER_CUBIC to any other interpolation, as required\n",
" resized_imgs.append(cv2.resize(temp, dsize=(std_wd, std_ht), interpolation=cv2.INTER_CUBIC))\n",
" labels.append(label)\n",
" return (resized_imgs, labels)\n",
" else:\n",
" img_list = os.listdir(dataset_dir + img_dir)\n",
" for img in img_list:\n",
" temp = cv2.imread(dataset_dir + img_dir + img)\n",
" #cv2.resize takes final size as (width, height)\n",
" #Change cv2.INTER_CUBIC to any other interpolation, as required\n",
" resized_imgs.append(cv2.resize(temp, dsize=(std_wd, std_ht), interpolation=cv2.INTER_CUBIC))\n",
" return resized_imgs"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Cn60kFfNqc2K"
},
"source": [
"train_imgs, train_labels = preprocess_imgs(train_dir, True)\n",
"dev_imgs, dev_labels = preprocess_imgs(validation_dir, True)\n",
"test_imgs = preprocess_imgs(test_dir, False)"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "hCQMlxehquo3",
"outputId": "5265b283-eda5-406b-99b3-0d15f1b1c1c6",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
}
},
"source": [
"#test\n",
"print(train_imgs[0].shape, train_labels[0], len(train_imgs), len(train_labels))\n",
"print(dev_imgs[0].shape, dev_labels[0], len(dev_imgs), len(dev_labels))\n",
"print(test_imgs[0].shape, len(test_imgs))\n",
"#For further validation you can save any img array as image and check the dimensions\n",
"#eg: cv2.imwrite('test.jpg', train_imgs[300])\n"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"(224, 224, 3) sattriya 5000 5000\n",
"(224, 224, 3) sattriya 364 364\n",
"(224, 224, 3) 156\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "03qTDNSesynZ"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment