40 import imgaug.augmenters
as iaa
41 from PIL
import Image
as Image_
52 st =
lambda x: iaa.Sometimes(0.3, x)
53 seq = iaa.Sequential([
54 st(iaa.GaussianBlur(sigma=(0, 0.5))),
55 st(iaa.ContrastNormalization((0.75, 1.5))),
56 st(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5)),
57 st(iaa.Multiply((0.8, 1.2), per_channel=0.2)),
59 scale={
"x": (0.8, 1.2),
"y": (1.0, 1.0)},
60 translate_percent={
"x": (-0.2, 0.2),
"y": (0, 0)},
64 rospack = rospkg.RosPack()
69 parser = argparse.ArgumentParser()
70 parser.add_argument(
'-r',
'--rate', default=
'0.8', type=float,
71 help=
'train:test dataset rate (default 0.8:0.2)')
72 parser.add_argument(
'-p',
'--path', default=osp.join(rospack.get_path(
73 'sound_classification'),
'train_data'), help=
'path to train data')
74 parser.add_argument(
'-a',
'--augment', default=
'1', type=int,
75 help=
'create {augment} images per 1 image')
76 parser.add_argument(
'-m',
'--model', type=str,
77 choices=[
'nin',
'vgg16'], default=
'nin',
78 help=
'Neural network model to use dataset')
79 parser.add_argument(
'-n',
'--number', default=
'100', type=int,
80 help=
'maximum number of images per class used to create dataset')
82 parser.add_argument(
'__name:', help=argparse.SUPPRESS, nargs=
'?')
83 parser.add_argument(
'__log:', help=argparse.SUPPRESS, nargs=
'?')
85 args = parser.parse_args()
87 if args.model ==
'nin':
88 image_size = (227, 227)
89 elif args.model ==
'vgg16':
90 image_size = (224, 224)
92 print(
'Model type {} is invalid.'.format(args.model))
94 root_dir = osp.expanduser(args.path)
95 origin_dir = osp.join(root_dir,
'original_spectrogram')
96 dataset_dir = osp.join(root_dir,
'dataset')
99 mean_of_dataset = np.zeros((image_size[0], image_size[1], 3)).astype(np.float32)
102 if osp.exists(dataset_dir):
103 shutil.rmtree(dataset_dir)
104 os.mkdir(dataset_dir)
106 classes = sorted(os.listdir(origin_dir))
107 with open(osp.join(root_dir,
'n_class.txt'), mode=
'w')
as f:
108 for class_name
in classes:
109 f.write(class_name +
'\n')
110 for class_id, class_name
in enumerate(classes):
111 file_names = os.listdir(osp.join(origin_dir, class_name))
112 file_num = len(file_names)
115 image_num_per_class = min(args.number, file_num)
116 selected_images = random.sample(list(range(file_num)), image_num_per_class)
117 for i, file_name
in enumerate(np.array(file_names)[selected_images]):
118 if file_name.endswith(
'.png')
is not True:
120 saved_file_name = class_name +
'_' + file_name
121 img = Image_.open(osp.join(origin_dir, class_name, file_name))
122 img =
img_jet(np.asarray(img))[:, :, [2, 1, 0]]
123 img = Image_.fromarray(img)
124 img_resize = img.resize((image_size[0], image_size[1]))
125 mean_of_dataset += img_resize
127 if i < image_num_per_class * rate:
128 saved_file_name =
'train_' + saved_file_name
129 for j
in range(args.augment):
130 _ = osp.splitext(saved_file_name)
131 saved_file_name_augmented = _[0] +
'_{0:03d}'.format(j) + _[1]
132 img_aug = Image_.fromarray(seq.augment_image(np.array(img_resize)))
133 img_aug.save(osp.join(dataset_dir, saved_file_name_augmented))
134 image_list_train.append(saved_file_name_augmented +
' ' + str(class_id) +
'\n')
135 print(
'saved {}'.format(saved_file_name_augmented))
137 saved_file_name =
'test_' + saved_file_name
138 img_resize.save(osp.join(dataset_dir, saved_file_name))
139 image_list_test.append(saved_file_name +
' ' + str(class_id) +
'\n')
140 print(
'saved {}'.format(saved_file_name))
144 file_path = osp.join(dataset_dir,
'train_images.txt')
145 with open(file_path, mode=
'w')
as f:
146 for line_
in image_list_train:
149 file_path = osp.join(dataset_dir,
'test_images.txt')
150 with open(file_path, mode=
'w')
as f:
151 for line_
in image_list_test:
155 file_path = osp.join(dataset_dir,
'mean_of_dataset.png')
156 saved_image = Image_.fromarray(
157 (mean_of_dataset / size_of_dataset).astype(np.uint8))
158 saved_image.save(file_path)
161 if __name__ ==
'__main__':