00001
00002
00003 """
00004 Create BoF and BoF Histogram dataset
00005 """
00006
00007 import gzip
00008 import cPickle as pickle
00009 import argparse
00010
00011 import numpy as np
00012 from sklearn.datasets.base import Bunch
00013 from sklearn.preprocessing import normalize
00014 from jsk_recognition_utils import BagOfFeatures
00015
00016
00017 def cmd_extract_bof(data_path, output, data_size=1):
00018 print('loading data')
00019 with gzip.open(data_path, 'rb') as f:
00020 descs = pickle.load(f)['descriptors']
00021
00022 n_data_all = len(descs)
00023 n_data = int(data_size * n_data_all)
00024 p = np.random.randint(0, len(descs), n_data)
00025 descs = np.array(descs)[p]
00026 X = np.vstack(map(lambda x: np.array(x).reshape((-1, 128)), descs))
00027 del descs
00028
00029 print('fitting bag of features extractor')
00030 bof = BagOfFeatures()
00031 try:
00032 bof.fit(X)
00033 except MemoryError, e:
00034 print('data_size: {} ({} * {})'.format(n_data, data_size, n_data_all))
00035 print(e)
00036
00037 print('saving bof')
00038 with gzip.open(output, 'wb') as f:
00039 pickle.dump(bof, f)
00040 print('done')
00041
00042
00043 def cmd_extract_bof_hist(data_path, bof_path, output):
00044 print('creating dataset')
00045 with gzip.open(data_path, 'rb') as f:
00046 dataset = pickle.load(f)
00047 descs, y, target_names = (dataset['descriptors'],
00048 dataset['target'],
00049 dataset['target_names'])
00050 del dataset
00051 print('extracting feature')
00052 with gzip.open(bof_path, 'rb') as f:
00053 bof = pickle.load(f)
00054 X = bof.transform(descs)
00055 del descs
00056 normalize(X, copy=False)
00057 dataset = Bunch(data=X, target=y, target_names=target_names)
00058 print('saving dataset')
00059 with gzip.open(output, 'wb') as f:
00060 pickle.dump(dataset, f)
00061 print('done')
00062
00063
00064 def main():
00065 parser = argparse.ArgumentParser()
00066 subparsers = parser.add_subparsers(dest='command')
00067
00068 bof_parser = subparsers.add_parser('extract_bof',
00069 help='fit dataset and extract Bag of Features')
00070 bof_parser.add_argument('data_path', help='SIFT data path')
00071 bof_parser.add_argument('-O', '--output', default='bof.pkl.gz',
00072 help='bof feature extractor instance save path')
00073 bof_parser.add_argument('-s', '--data-size', default=1, type=float,
00074 help='data_size in 0 to 1')
00075
00076 dataset_parser = subparsers.add_parser('extract_bof_hist',
00077 help='create BoF histogram dataset')
00078 dataset_parser.add_argument('data_path', help='SIFT data path')
00079 dataset_parser.add_argument('bof_path',
00080 help="BoF data path extracted by 'extract_bof' command")
00081 dataset_parser.add_argument('-O', '--output', default='bof_hist.pkl.gz',
00082 help='save path of bof histogram (default: bof_hist.pkl.gz)')
00083 args = parser.parse_args()
00084
00085 if args.command == 'extract_bof':
00086 cmd_extract_bof(data_path=args.data_path,
00087 output=args.output,
00088 data_size=args.data_size)
00089 elif args.command == 'extract_bof_hist':
00090 cmd_extract_bof_hist(data_path=args.data_path,
00091 bof_path=args.bof_path,
00092 output=args.output)
00093
00094
00095 if __name__ == '__main__':
00096 main()