create_dataset.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 
4 # create dataset for training with chainer.
5 # some data augmentation is executed for training data (not for test data)
6 
7 # directory composition
8 # original_spectrogram - classA -- 001.png
9 # | |- 002.png
10 # | |- ...
11 # - classB -- 001.png
12 # |- 002.png
13 # |- ...
14 #
15 # -> (./create_dataset.py)
16 #
17 # original_spectrogram - classA -- 001.png
18 # | |- 002.png
19 # | |- ...
20 # - classB -- 001.png
21 # |- 002.png
22 # |- ...
23 #
24 # dataset -- train_images.txt # necessary for chainer
25 # |- test_images.txt # necessary for chainer
26 # |- train_(class)000*.png
27 # |- ...
28 # |- test_(class)000*.png
29 # |- ...
30 #
31 # n_class.txt
32 #
33 # Total data number
34 # train: (number of images per class) * (train:test rate) * (augment number)
35 # test : (number of images per class) * (1 - (train:test rate))
36 
37 import argparse
39 import imgaug as ia
40 import imgaug.augmenters as iaa
41 from PIL import Image as Image_
42 import numpy as np
43 import os
44 import os.path as osp
45 import random
46 import rospkg
47 import shutil
48 
49 
50 # for data augmentation
51 ia.seed(1)
52 st = lambda x: iaa.Sometimes(0.3, x)
53 seq = iaa.Sequential([
54  st(iaa.GaussianBlur(sigma=(0, 0.5))),
55  st(iaa.ContrastNormalization((0.75, 1.5))),
56  st(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5)),
57  st(iaa.Multiply((0.8, 1.2), per_channel=0.2)),
58  st(iaa.Affine(
59  scale={"x": (0.8, 1.2), "y": (1.0, 1.0)},
60  translate_percent={"x": (-0.2, 0.2), "y": (0, 0)},
61  ))
62 ], random_order=True) # apply augmenters in random order
63 
64 rospack = rospkg.RosPack()
65 
66 
67 # Split dataset into train data and test data. The rate is given by --rate.
68 def split():
69  parser = argparse.ArgumentParser()
70  parser.add_argument('-r', '--rate', default='0.8', type=float,
71  help='train:test dataset rate (default 0.8:0.2)')
72  parser.add_argument('-p', '--path', default=osp.join(rospack.get_path(
73  'sound_classification'), 'train_data'), help='path to train data')
74  parser.add_argument('-a', '--augment', default='1', type=int,
75  help='create {augment} images per 1 image')
76  parser.add_argument('-m', '--model', type=str,
77  choices=['nin', 'vgg16'], default='nin',
78  help='Neural network model to use dataset')
79  parser.add_argument('-n', '--number', default='100', type=int,
80  help='maximum number of images per class used to create dataset')
81  # Ignore arguments sent by roslaunch.
82  parser.add_argument('__name:', help=argparse.SUPPRESS, nargs='?')
83  parser.add_argument('__log:', help=argparse.SUPPRESS, nargs='?')
84 
85  args = parser.parse_args()
86  rate = args.rate
87  if args.model == 'nin':
88  image_size = (227, 227)
89  elif args.model == 'vgg16':
90  image_size = (224, 224)
91  else:
92  print('Model type {} is invalid.'.format(args.model))
93  exit()
94  root_dir = osp.expanduser(args.path)
95  origin_dir = osp.join(root_dir, 'original_spectrogram')
96  dataset_dir = osp.join(root_dir, 'dataset')
97  image_list_train = []
98  image_list_test = []
99  mean_of_dataset = np.zeros((image_size[0], image_size[1], 3)).astype(np.float32)
100  size_of_dataset = 0
101 
102  if osp.exists(dataset_dir):
103  shutil.rmtree(dataset_dir)
104  os.mkdir(dataset_dir)
105  # write how many classes
106  classes = sorted(os.listdir(origin_dir))
107  with open(osp.join(root_dir, 'n_class.txt'), mode='w') as f:
108  for class_name in classes:
109  f.write(class_name + '\n')
110  for class_id, class_name in enumerate(classes):
111  file_names = os.listdir(osp.join(origin_dir, class_name))
112  file_num = len(file_names)
113  # copy train and test data
114  # resize and augment data (multiple args.augment times)
115  image_num_per_class = min(args.number, file_num)
116  selected_images = random.sample(list(range(file_num)), image_num_per_class)
117  for i, file_name in enumerate(np.array(file_names)[selected_images]):
118  if file_name.endswith('.png') is not True:
119  continue
120  saved_file_name = class_name + '_' + file_name
121  img = Image_.open(osp.join(origin_dir, class_name, file_name))
122  img = img_jet(np.asarray(img))[:, :, [2, 1, 0]] # bgr -> rgb
123  img = Image_.fromarray(img)
124  img_resize = img.resize((image_size[0], image_size[1]))
125  mean_of_dataset += img_resize
126  size_of_dataset += 1
127  if i < image_num_per_class * rate: # save data for train
128  saved_file_name = 'train_' + saved_file_name
129  for j in range(args.augment):
130  _ = osp.splitext(saved_file_name)
131  saved_file_name_augmented = _[0] + '_{0:03d}'.format(j) + _[1]
132  img_aug = Image_.fromarray(seq.augment_image(np.array(img_resize)))
133  img_aug.save(osp.join(dataset_dir, saved_file_name_augmented))
134  image_list_train.append(saved_file_name_augmented + ' ' + str(class_id) + '\n')
135  print('saved {}'.format(saved_file_name_augmented))
136  else: # save data for test
137  saved_file_name = 'test_' + saved_file_name
138  img_resize.save(osp.join(dataset_dir, saved_file_name))
139  image_list_test.append(saved_file_name + ' ' + str(class_id) + '\n')
140  print('saved {}'.format(saved_file_name))
141 
142  # create images.txt
143  # for train
144  file_path = osp.join(dataset_dir, 'train_images.txt')
145  with open(file_path, mode='w') as f:
146  for line_ in image_list_train:
147  f.write(line_)
148  # for test
149  file_path = osp.join(dataset_dir, 'test_images.txt')
150  with open(file_path, mode='w') as f:
151  for line_ in image_list_test:
152  f.write(line_)
153 
154  # save mean value of dataset
155  file_path = osp.join(dataset_dir, 'mean_of_dataset.png')
156  saved_image = Image_.fromarray(
157  (mean_of_dataset / size_of_dataset).astype(np.uint8))
158  saved_image.save(file_path)
159 
160 
161 if __name__ == '__main__':
162  split()
sound_classification.process_gray_image
Definition: process_gray_image.py:1
sound_classification.process_gray_image.img_jet
def img_jet(img)
Definition: process_gray_image.py:39
create_dataset.split
def split()
Definition: create_dataset.py:68
create_dataset.st
st
Definition: create_dataset.py:52


sound_classification
Author(s):
autogenerated on Fri May 16 2025 03:12:55