prepare_data.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import os
3 from pathlib import Path
4 import pickle
5 import sys
6 import argparse
7 import shutil
8 from zipfile import ZipFile
9 
10 
12  def __init__(self, train, valid, zip_file):
13  self.training_paths = train
14  self.validation_paths = valid
15  self.zip_file = zip_file
16 
17  if not zip_file.endswith(".zip"):
18  self.zip_file += ".zip"
19 
20  try:
21  Path("data").mkdir(parents=True)
22  except FileExistsError as e:
23  print(
24  "'data' directory exist in the working directory already. Can't process images."
25  )
26  return
27 
29  self.make_zip_archive()
30 
31  def make_zip_archive(self):
32  zipObj = None
33  try:
34  zipObj = ZipFile(self.zip_file, "x")
35  except FileExistsError as e:
36  print(
37  "Given zip file already exist. Pick different name, or move the zip file."
38  )
39  shutil.rmtree("data")
40  os.remove("partition.pickle")
41  os.remove("labels.pickle")
42  return
43 
44  print("Created %s file" % (self.zip_file))
45 
46  zipObj.write("partition.pickle")
47  print("Zipped 'partition.pickle' file.")
48  zipObj.write("labels.pickle")
49  print("Zipped 'labels.pickle' file.")
50 
51  images = os.listdir("data")
52 
53  for img in images:
54  zipObj.write(os.path.join("data", img))
55  print("Zipped 'data' directory.")
56  zipObj.close()
57 
58  print("Zip archive ready, removing temp files.")
59  shutil.rmtree("data")
60  os.remove("partition.pickle")
61  os.remove("labels.pickle")
62 
63  def process_data(self, paths):
64  labels = {"linear": {}, "angular": {}}
65  partition = {"validation": [], "train": []}
66 
67  for path in paths:
68  print("processing %s/labels.txt" % (path))
69  file = open(os.path.join(path, "labels.txt"), "r")
70  lines = file.readlines()
71 
72  for line in lines:
73  photo, label_tuple = self.get_label(line)
74  labels["linear"][photo] = label_tuple[0]
75  labels["angular"][photo] = label_tuple[1]
76 
77  if path in self.validation_paths:
78  partition["validation"].append(photo)
79  else:
80  partition["train"].append(photo)
81 
82  shutil.copy(os.path.join(path, photo), os.path.join("data", photo))
83  file.close()
84 
85  with open("labels.pickle", "wb") as handle:
86  pickle.dump(labels, handle, pickle.DEFAULT_PROTOCOL)
87 
88  with open("partition.pickle", "wb") as handle:
89  pickle.dump(partition, handle, protocol=pickle.DEFAULT_PROTOCOL)
90 
91  def make_tuple(self, string):
92  divide = string.split(",")
93  first = float(divide[0][1:])
94  second = float(divide[1][:-1])
95  return (first, second)
96 
97  def get_label(self, line):
98  divide = line.split(":")
99  key = divide[0]
100  value = self.make_tuple(divide[1].strip())
101  return key, value
102 
103 
104 if __name__ == "__main__":
105  parser = argparse.ArgumentParser(description="Prepare dataset for neural network")
106  parser.add_argument(
107  "-t",
108  "--train_data",
109  nargs="+",
110  type=str,
111  required=True,
112  metavar="[paths]",
113  help="paths to directiories with training data",
114  dest="train",
115  )
116  parser.add_argument(
117  "-v",
118  "--valid_data",
119  nargs="+",
120  type=str,
121  required=True,
122  metavar="[paths]",
123  help="paths to directiories with validation data",
124  dest="valid",
125  )
126  parser.add_argument(
127  "-z",
128  "--zip_file",
129  nargs="?",
130  type=str,
131  metavar="path",
132  help="name of the zip archive with dataset",
133  dest="zip",
134  default="my_dataset.zip",
135  )
136 
137  args = parser.parse_args(sys.argv[1:])
138 
139  data_processor = DataProcessor(args.train, args.valid, args.zip)
prepare_data.DataProcessor.zip_file
zip_file
Definition: prepare_data.py:15
prepare_data.DataProcessor.__init__
def __init__(self, train, valid, zip_file)
Definition: prepare_data.py:12
prepare_data.DataProcessor.process_data
def process_data(self, paths)
Definition: prepare_data.py:63
prepare_data.DataProcessor.make_tuple
def make_tuple(self, string)
Definition: prepare_data.py:91
prepare_data.DataProcessor.training_paths
training_paths
Definition: prepare_data.py:13
prepare_data.DataProcessor.make_zip_archive
def make_zip_archive(self)
Definition: prepare_data.py:31
prepare_data.DataProcessor.get_label
def get_label(self, line)
Definition: prepare_data.py:97
prepare_data.DataProcessor
Definition: prepare_data.py:11
prepare_data.DataProcessor.validation_paths
validation_paths
Definition: prepare_data.py:14


leo_example_line_follower
Author(s): Aleksander SzymaƄski
autogenerated on Fri Nov 25 2022 03:11:50